102 Commits
2.0.2 ... 2.0.4

Author SHA1 Message Date
624c5bd316 Create 2.0.4 release. 2008-10-06 20:16:02 +00:00
b2d6122769 Fix linux build. 2008-10-05 19:20:42 +00:00
cd59058fc2 Fix linux includes. 2008-10-05 19:17:59 +00:00
db14e048e1 Fix errors in Green and Alpha optimal compressors. 2008-10-02 07:33:05 +00:00
0c36fcf626 Update changelog. 2008-10-01 22:30:19 +00:00
68be24bf00 Set correct DXT5n swizzle code.
Select swizzle codes in nvtt instead of nvimage.
2008-10-01 22:28:57 +00:00
b284669993 Try some optimizations. 2008-10-01 22:28:01 +00:00
2f6e885ced Add DXT1 compressor that uses texture to avoid CPU swizzling.
Fix errors under emulation.
Experiment with DXT5 compressor.
2008-10-01 22:24:53 +00:00
1957120c26 Reference gnuwin32 libs and include paths correctly. 2008-09-11 07:56:39 +00:00
d7ddcb9263 Set optimal options for release vc9 projects.
Fixes issue 62.
2008-09-11 07:48:02 +00:00
f5f6e88585 Do not use freeimage yet. 2008-08-20 22:34:08 +00:00
13e2d2e447 Fix float support. 2008-08-20 22:32:54 +00:00
0b13b6d0d9 Update version number. 2008-08-20 22:31:14 +00:00
ad85b0fcbe Include gnuwin32 directory properly. 2008-08-20 22:30:41 +00:00
0515d9a0a0 Add Half.{h,cpp} to project. 2008-07-31 10:00:05 +00:00
16adf94635 Add support for floating point output formats.
Images are currently output in linear space, some color transforms not applied.
2008-07-31 09:55:22 +00:00
e9002a7d86 Adding support for floating point input/output. Work in progress. 2008-07-31 02:04:44 +00:00
3161fca9d9 Decompress DDS10 files. Only BC# supported so far. 2008-07-30 02:28:09 +00:00
bb5b02df0e Adding support for floating point images as input. 2008-07-29 08:56:40 +00:00
1941e27148 Fix DDS10 header initialization.
Fix depth initialization.
2008-07-29 08:43:42 +00:00
02c3abb394 Fix color transforms. 2008-07-29 06:05:11 +00:00
86ef67bbfa Fix error in input image transformation. 2008-07-29 05:45:35 +00:00
79529f994f Fix compilation error. 2008-07-29 05:44:31 +00:00
c2508d9eeb Add option to use dds10 headers. 2008-07-29 02:31:57 +00:00
b1cd916105 Change parameters in declaration to match definition. 2008-07-29 02:31:09 +00:00
96655b3e7c Work in progress:
- better support for DDS10 file format.
- support for RGBA pixel formats with more than 32 bits.
- support for pixel types other than UNORM.
2008-07-26 10:03:12 +00:00
529c0075e1 do not compile mpeg tests. 2008-07-26 09:01:00 +00:00
c70e5d6121 Reorg header files. 2008-07-05 09:10:45 +00:00
7394644719 Move ui to separate folder. 2008-07-05 09:10:00 +00:00
d5055300e2 Require cmake 2.6.0 2008-07-05 09:09:23 +00:00
b2e7d717c2 Fix compilation error under gcc/linux. 2008-07-05 08:57:03 +00:00
756f12c994 Fix errors in color transforms.
Add support for color offsets.
Add support for special swizzles that select default const values.
2008-06-30 10:59:57 +00:00
206bfcf0f3 reorg included files 2008-06-28 08:40:32 +00:00
15cfd1c06b fix path to gnulibs 2008-06-28 02:57:31 +00:00
8b26ecc865 fix path to gnulibs 2008-06-28 02:56:57 +00:00
488f3c8f42 fix path to gnu libs. 2008-06-28 02:56:27 +00:00
458b8814a7 fix path to gnulibs. 2008-06-28 02:55:35 +00:00
c08acc8a71 Add single color compressor to optimal green compressor.
Improve quality of DXT1 green compressor increasing search range.
2008-06-28 02:50:09 +00:00
45f7244f20 Check in proposed fix for issue 44. 2008-06-27 18:52:49 +00:00
f412ec8efb Fix assertion. 2008-06-26 07:23:31 +00:00
a1a34f546f Do not compile nvmpegenc. 2008-06-26 07:23:09 +00:00
7ef88c6f7e Fix build on OSX 10.5. Solves issue 44. 2008-06-26 07:22:34 +00:00
3368f9039b Fix embarrasing typo. 2008-06-24 21:47:47 +00:00
870a3fe438 Add references. 2008-06-23 19:24:59 +00:00
82bed4ac9a Eliminate warning. 2008-06-19 10:01:56 +00:00
b8a9395117 Fix end of lines. 2008-06-19 09:53:09 +00:00
65f769160d Fix solution. 2008-06-19 09:52:20 +00:00
524ebbec8c Add PhotoshopExporter template project to solution. 2008-06-19 00:38:15 +00:00
ce85eaff3e Add photoshop exporter template. 2008-06-19 00:37:15 +00:00
6befe3505c Enable Qt ui with win32/msvc. 2008-06-19 00:35:47 +00:00
ff6f7f0506 Add quick and dirty single frame mpeg encoder based on ffmpeg. 2008-06-13 08:12:58 +00:00
55e7d3dec4 Delete outdated comment. 2008-06-03 06:34:18 +00:00
b5e373b734 Compile NVTT under G5/leopard 2008-05-23 22:22:09 +00:00
6ce542b5c0 Fix error in cmake file. 2008-05-23 06:51:02 +00:00
58e5f6534f Print version number in copyright statement. 2008-05-22 21:48:40 +00:00
eda4786ca6 Update version number. 2008-05-22 21:48:19 +00:00
29a720bf82 Do not print text. 2008-05-22 21:32:09 +00:00
1120f83f7d Fix errors. 2008-05-22 21:31:44 +00:00
c38c3dc584 Fix win32 buid errors and warnings due to FreeImage. 2008-05-22 21:31:15 +00:00
7d3d0ede9d Update projects to use FreeImage. 2008-05-22 21:30:34 +00:00
bc66a7ad74 Update cmake files. 2008-05-22 19:47:52 +00:00
ca8d17abf5 Reorganize external libs. 2008-05-22 19:47:23 +00:00
10e79dac9d Update changelog. 2008-05-21 19:22:43 +00:00
e068964423 Fix EXR loading code. Issue 45. Fix provided by alastairpatrick. 2008-05-21 19:17:27 +00:00
ea340443d9 Add -mipfilter command line option, per request of Noel Llopis. 2008-05-19 20:10:05 +00:00
fb2b0cb38c Fix bug reported by Noel Llopis.
Make sure FreeImage declarations are not used when FreeImage is not available.
2008-05-19 18:23:42 +00:00
c01566cd2f Add support for FreeImage in nvimage.
Add support for floating point input images in nvtt.
2008-05-15 09:47:55 +00:00
47bdab8e27 Define FREEIMAGE_LIBRARIES. 2008-05-15 09:46:45 +00:00
70267fda15 Add support for input floating point images. Patch provided by Jim Tilander. See issue 27. 2008-05-15 06:18:24 +00:00
aebcea412c Search for freeimage. 2008-05-15 06:17:46 +00:00
bccdcd49da Use standard gram schmidt 2008-05-15 06:04:17 +00:00
68e9f05794 Add freeimage cmake file. 2008-05-15 05:57:11 +00:00
0f186e688f Remove 'virtual' from non virtual methods. 2008-05-08 21:15:05 +00:00
38e9652d64 Remove executable flag. 2008-05-08 18:20:55 +00:00
f08114c1b5 Whops, check fix for vc9. 2008-05-08 18:18:53 +00:00
7b9f891f92 Update vc9 projects. 2008-05-07 21:58:50 +00:00
787c9bb8fb Add vc9 projects. 2008-05-07 21:55:33 +00:00
42220b981e Add vc9 folder 2008-05-07 21:45:48 +00:00
70331a37fd Refactor compression functions, group them into class methods. 2008-05-06 23:55:19 +00:00
2ffc4cd7ad Ups, checked in the file before saving it. 2008-05-06 23:34:46 +00:00
4ba8e87a38 Ups, checked in file before saving it. 2008-05-06 23:34:13 +00:00
d440d68aa8 Update ChangeLog. 2008-05-06 23:22:04 +00:00
48f61dbfc0 Add support for linear and swizzle transforms. Fixes issue 4. 2008-05-06 23:21:39 +00:00
94c3fa75a8 Add comments indicating where to perform linear color transforms. 2008-05-06 22:01:23 +00:00
c562af6d9b Integrate YCoCg color space conversion by Jim Tilander. 2008-05-06 21:49:10 +00:00
a889f2fda6 Add support for alpha modes in the CUDA compressors. 2008-05-06 20:04:05 +00:00
d855d0461b Add single color checks to CUDA compressors.
Use optimized bitmap table for CTX compressor.
2008-05-06 19:52:27 +00:00
246f2a409a Add cpp file to project so that a library is built. 2008-05-06 19:48:43 +00:00
6a6b3edce1 factorial optimization suggested by pponywong. 2008-05-06 06:37:06 +00:00
52b3bc9437 Update project files. Remove fast compressor, add optimal compressor. 2008-04-29 22:34:09 +00:00
f6ab357b09 Add missing files to project. 2008-04-29 22:33:42 +00:00
ce3a65c03e Fix operator= in String class. 2008-04-29 22:32:12 +00:00
8d9bf5c0b3 Fix after refactoring CPU compressors. Changes were not tested with CUDA enabled. 2008-04-29 22:31:55 +00:00
ab5265e642 Remove declaration of method that was removed. 2008-04-28 08:39:24 +00:00
fd1d5e41c7 Add missing files! 2008-04-28 06:22:26 +00:00
3980d5dc21 Update changelog. 2008-04-26 09:17:16 +00:00
15e7125b4b Check for single color blocks in all compressors. 2008-04-26 09:16:56 +00:00
921ee354c0 Remove legacy compressors.
Add iteration count parameter to iterative alpha compressor.
Add optimal compressors.
2008-04-26 08:02:30 +00:00
e3f7e303e4 Use FLT_MAX instead of INFINITE. The latter not supported in msvc. 2008-04-20 06:01:50 +00:00
1df69495fc Precompute fast cluster fit factors, and store as static const.
nvtt is completely reentrant now. Fixes issue 37.
cleanup interface of cuda compressors.
2008-04-18 08:49:32 +00:00
91eb30667f Add TLS class wrapper.
Fix AutoPtr operator=.
Fix typo.
2008-04-17 18:39:01 +00:00
6db5cffca6 Fix changelog. 2008-04-17 09:28:38 +00:00
84 changed files with 2777 additions and 6223 deletions

View File

@ -1,7 +1,14 @@
NVIDIA Texture Tools version 2.1.0
* CTX1 CUDA compressor.
* DXT1n CUDA compressor.
* Support alpha premultiplication by Charles Nicholson. See issue 30.
NVIDIA Texture Tools version 2.0.4
* Fix error in RGB format output; reported by jonsoh. See issue 49.
* Added support RGB format dithering by jonsoh. Fixes issue 50 and 51.
* Prevent infinite loop in indexMirror when width equal 1. Fixes issue 65.
* Implement general scale filter, including upsampling.
NVIDIA Texture Tools version 2.0.3
* More accurate DXT3 compressor. Fixes issue 38.
* Remove legacy compressors. Fix issue 34.
* Check for single color in all compressors. Fixes issue 43.
* Fix error in fast downsample filter, reported by Noel Llopis.
NVIDIA Texture Tools version 2.0.2
* Fix copy ctor error reported by Richard Sim.
@ -10,7 +17,6 @@ NVIDIA Texture Tools version 2.0.2
* Fix RGBA modes with less than 32 bpp by Viktor Linder.
* Fix alpha decompression by Amorilia. See issue 40.
* Avoid default-initialized constructors for POD types, reported by Jim Tilander.
* Improved decompressor tool submitted by Amorilia. See issue 41.
* Add single color compresor for DXT1a.
* Set swizzle code to ATI2 files. See issue 41.

View File

@ -2,7 +2,7 @@
--------------------------------------------------------------------------------
NVIDIA Texture Tools
README.txt
Version 2.1
Version 2.0
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------

View File

@ -1 +1 @@
2.1.0
2.0.4

BIN
gnuwin32/lib/libpng.a Normal file

Binary file not shown.

BIN
gnuwin32/lib/libz.a Normal file

Binary file not shown.

View File

@ -278,6 +278,7 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17"
/>
<Tool

View File

@ -277,6 +277,7 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17"
/>
<Tool

View File

@ -355,6 +355,10 @@
RelativePath="..\..\..\src\nvimage\nvimage.h"
>
</File>
<File
RelativePath="..\..\..\src\nvimage\PixelFormat.h"
>
</File>
<File
RelativePath="..\..\..\src\nvimage\PsdFile.h"
>

View File

@ -277,6 +277,10 @@
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\src\nvmath\Plane.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
@ -295,6 +299,10 @@
RelativePath="..\..\..\src\nvmath\Matrix.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Plane.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Vector.h"
>

View File

@ -3,18 +3,18 @@ Microsoft Visual Studio Solution File, Format Version 9.00
# Visual Studio 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}"
@ -34,16 +34,16 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvdd
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}"
ProjectSection(ProjectDependencies) = postProject
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}"
@ -55,18 +55,13 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nv
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stress", "stress\stress.vcproj", "{317B694E-B5C1-42A6-956F-FC12B69175A6}"
ProjectSection(ProjectDependencies) = postProject
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug (no cuda)|Any CPU = Debug (no cuda)|Any CPU
@ -319,22 +314,6 @@ Global
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.Build.0 = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|x64.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -53,8 +53,8 @@ END
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 2,1,0,0
PRODUCTVERSION 2,1,0,0
FILEVERSION 2,0,4,0
PRODUCTVERSION 2,0,4,0
FILEFLAGSMASK 0x17L
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -71,12 +71,12 @@ BEGIN
BEGIN
VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 1, 0, 0"
VALUE "FileVersion", "2, 0, 4, 0"
VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 1, 0, 0"
VALUE "ProductVersion", "2, 0, 4, 0"
END
END
BLOCK "VarFileInfo"

View File

@ -691,7 +691,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -701,7 +701,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -711,7 +711,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -721,7 +721,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -849,10 +849,6 @@
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\FastCompressDXT.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\InputOptions.cpp"
>
@ -865,6 +861,10 @@
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OutputOptions.cpp"
>
@ -911,10 +911,6 @@
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\FastCompressDXT.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\InputOptions.h"
>
@ -927,6 +923,10 @@
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OutputOptions.h"
>

View File

@ -1,201 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="stress"
ProjectGUID="{317B694E-B5C1-42A6-956F-FC12B69175A6}"
RootNamespace="stress"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\src\nvtt\tests\stress.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -1,11 +1,11 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
SUBDIRS(nvcore)
SUBDIRS(nvmath)
SUBDIRS(nvimage)
SUBDIRS(nvtt)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
# OpenGL
INCLUDE(FindOpenGL)
IF(OPENGL_FOUND)
@ -84,7 +84,6 @@ ELSE(PNG_FOUND)
ENDIF(PNG_FOUND)
# TIFF
SET(TIFF_NAMES libtiff)
INCLUDE(FindTIFF)
IF(TIFF_FOUND)
SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise")

View File

@ -1,147 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_ALGORITHMS_H
#define NV_CORE_ALGORITHMS_H
namespace nv
{
/// Return the maximum of two values.
template <typename T>
inline const T & max(const T & a, const T & b)
{
//return std::max(a, b);
if( a < b ) {
return b;
}
return a;
}
/// Return the minimum of two values.
template <typename T>
inline const T & min(const T & a, const T & b)
{
//return std::min(a, b);
if( b < a ) {
return b;
}
return a;
}
/// Clamp between two values.
template <typename T>
inline const T & clamp(const T & x, const T & a, const T & b)
{
return min(max(x, a), b);
}
/// Delete all the elements of a container.
template <typename T>
void deleteAll(T & container)
{
for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
{
delete container[i];
}
}
// @@ Swap should be implemented here.
#if 0
// This does not use swap, but copies, in some cases swaps are much faster than copies!
// Container should implement operator[], and size()
template <class Container, class T>
void insertionSort(Container<T> & container)
{
const uint n = container.size();
for (uint i=1; i < n; ++i)
{
T value = container[i];
uint j = i;
while (j > 0 && container[j-1] > value)
{
container[j] = container[j-1];
--j;
}
if (i != j)
{
container[j] = value;
}
}
}
template <class Container, class T>
void quickSort(Container<T> & container)
{
quickSort(container, 0, container.count());
}
{
/* threshhold for transitioning to insertion sort */
while (n > 12) {
int c01,c12,c,m,i,j;
/* compute median of three */
m = n >> 1;
c = p[0] > p[m];
c01 = c;
c = &p[m] > &p[n-1];
c12 = c;
/* if 0 >= mid >= end, or 0 < mid < end, then use mid */
if (c01 != c12) {
/* otherwise, we'll need to swap something else to middle */
int z;
c = p[0] < p[n-1];
/* 0>mid && mid<n: 0>n => n; 0<n => 0 */
/* 0<mid && mid>n: 0>n => 0; 0<n => n */
z = (c == c12) ? 0 : n-1;
swap(p[z], p[m]);
}
/* now p[m] is the median-of-three */
/* swap it to the beginning so it won't move around */
swap(p[0], p[m]);
/* partition loop */
i=1;
j=n-1;
for(;;) {
/* handling of equality is crucial here */
/* for sentinels & efficiency with duplicates */
for (;;++i) {
c = p[i] > p[0];
if (!c) break;
}
a = &p[0];
for (;;--j) {
b=&p[j];
c = p[j] > p[0]
if (!c) break;
}
/* make sure we haven't crossed */
if (i >= j) break;
swap(p[i], p[j]);
++i;
--j;
}
/* recurse on smaller side, iterate on larger */
if (j < (n-i)) {
quickSort(p, j);
p = p+i;
n = n-i;
}
else {
quickSort(p+i, n-i);
n = j;
}
}
insertionSort();
}
#endif // 0
} // nv namespace
#endif // NV_CORE_ALGORITHMS_H

View File

@ -80,13 +80,13 @@ public:
/// Clear all the bits.
void clearAll()
{
memset(m_bitArray.mutableBuffer(), 0, m_bitArray.size());
memset(m_bitArray.unsecureBuffer(), 0, m_bitArray.size());
}
/// Set all the bits.
void setAll()
{
memset(m_bitArray.mutableBuffer(), 0xFF, m_bitArray.size());
memset(m_bitArray.unsecureBuffer(), 0xFF, m_bitArray.size());
}
/// Toggle all the bits.

View File

@ -3,13 +3,7 @@ ADD_SUBDIRECTORY(poshlib)
SET(CORE_SRCS
nvcore.h
DefsGnucDarwin.h
DefsGnucLinux.h
DefsGnucWin32.h
DefsVcWin32.h
Ptr.h
RefCounted.h
RefCounted.cpp
BitArray.h
Memory.h
Memory.cpp
@ -25,10 +19,7 @@ SET(CORE_SRCS
TextWriter.h
TextWriter.cpp
Radix.h
Radix.cpp
CpuInfo.h
CpuInfo.cpp
Algorithms.h)
Radix.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -19,7 +19,6 @@ Do not use memmove in insert & remove, use copy ctors instead.
#include <nvcore/nvcore.h>
#include <nvcore/Memory.h>
#include <nvcore/Debug.h>
//#include <nvcore/Stream.h>
#include <string.h> // memmove
#include <new> // for placement new
@ -71,10 +70,40 @@ namespace nv
{
// Templates
/// Return the maximum of two values.
template <typename T>
inline const T & max(const T & a, const T & b)
{
//return std::max(a, b);
if( a < b ) {
return b;
}
return a;
}
/// Return the minimum of two values.
template <typename T>
inline const T & min(const T & a, const T & b)
{
//return std::min(a, b);
if( b < a ) {
return b;
}
return a;
}
/// Clamp between two values.
template <typename T>
inline const T & clamp(const T & x, const T & a, const T & b)
{
return min(max(x, a), b);
}
/// Swap two values.
template <typename T>
inline void swap(T & a, T & b)
{
//return std::swap(a, b);
T temp = a;
a = b;
b = temp;
@ -105,6 +134,16 @@ namespace nv
uint operator()(uint x) const { return x; }
};
/// Delete all the elements of a container.
template <typename T>
void deleteAll(T & container)
{
for(typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
{
delete container[i];
}
}
/** Return the next power of two.
* @see http://graphics.stanford.edu/~seander/bithacks.html
@ -115,7 +154,7 @@ namespace nv
inline uint nextPowerOfTwo( uint x )
{
nvDebugCheck( x != 0 );
#if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction.
#if 1 // On modern CPUs this is as fast as using the bsr instruction.
x--;
x |= x >> 1;
x |= x >> 2;
@ -138,6 +177,15 @@ namespace nv
return (n & (n-1)) == 0;
}
/// Simple iterator interface.
template <typename T>
struct Iterator
{
virtual void advance();
virtual bool isDone();
virtual T current();
};
/**
* Replacement for std::vector that is easier to debug and provides
@ -204,7 +252,7 @@ namespace nv
const T * buffer() const { return m_buffer; }
/// Get vector pointer.
T * mutableBuffer() { return m_buffer; }
T * unsecureBuffer() { return m_buffer; }
/// Is vector empty.
bool isEmpty() const { return m_size == 0; }

View File

@ -1,88 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/CpuInfo.h>
#include <nvcore/Debug.h>
using namespace nv;
#if NV_OS_WIN32
#define _WIN32_WINNT 0x0501
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow64()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
return false;
}
}
return bIsWow64 == TRUE;
}
#endif // NV_OS_WIN32
uint CpuInfo::processorCount()
{
#if NV_OS_WIN32
SYSTEM_INFO sysInfo;
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
if (isWow64())
{
GetNativeSystemInfo(&sysInfo);
}
else
{
GetSystemInfo(&sysInfo);
}
uint count = (uint)sysInfo.dwNumberOfProcessors;
nvDebugCheck(count >= 1);
return count;
#else
return 1;
#endif
}
uint CpuInfo::coreCount()
{
return 1;
}
bool CpuInfo::hasMMX()
{
return false;
}
bool CpuInfo::hasSSE()
{
return false;
}
bool CpuInfo::hasSSE2()
{
return false;
}
bool CpuInfo::hasSSE3()
{
return false;
}

View File

@ -1,88 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_CPUINFO_H
#define NV_CORE_CPUINFO_H
#include <nvcore/nvcore.h>
#if NV_CC_MSVC
# include <intrin.h> // __rdtsc
#endif
namespace nv
{
// CPU Information.
class CpuInfo
{
static uint processorCount();
static uint coreCount();
static bool hasMMX();
static bool hasSSE();
static bool hasSSE2();
static bool hasSSE3();
};
#if NV_CC_MSVC
#pragma intrinsic(__rdtsc)
inline uint64 rdtsc()
{
return __rdtsc();
}
#endif
#if NV_CC_GNUC
#if defined(__i386__)
inline /*volatile*/ uint64 rdtsc()
{
uint64 x;
//__asm__ volatile ("rdtsc" : "=A" (x));
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}
#elif defined(__x86_64__)
static __inline__ uint64 rdtsc(void)
{
unsigned int hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
}
#elif defined(__powerpc__)
static __inline__ uint64 rdtsc(void)
{
uint64 result=0;
unsigned long int upper, lower, tmp;
__asm__ volatile(
"0: \n"
"\tmftbu %0 \n"
"\tmftb %1 \n"
"\tmftbu %2 \n"
"\tcmpw %2,%0 \n"
"\tbne 0b \n"
: "=r"(upper),"=r"(lower),"=r"(tmp)
);
result = upper;
result = result<<32;
result = result|lower;
return(result);
}
#endif
#endif // NV_CC_GNUC
} // nv namespace
#endif // NV_CORE_CPUINFO_H

View File

@ -70,6 +70,8 @@ typedef uint32 uint;
#pragma warning(disable : 4711) // function selected for automatic inlining
#pragma warning(disable : 4725) // Pentium fdiv bug
#pragma warning(disable : 4345) // behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized
#pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged.
#pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup

View File

@ -8,11 +8,6 @@
#include <stdio.h> // NULL
#define NV_DECLARE_PTR(Class) \
typedef SmartPtr<class Class> Class ## Ptr; \
typedef SmartPtr<const class Class> ClassConst ## Ptr
namespace nv
{
@ -98,23 +93,125 @@ private:
T * m_ptr;
};
#if 0
/** Reference counted base class to be used with Pointer.
*
* The only requirement of the Pointer class is that the RefCounted class implements the
* addRef and release methods.
*/
class RefCounted
{
NV_FORBID_COPY(RefCounted);
public:
/// Ctor.
RefCounted() : m_count(0), m_weak_proxy(NULL)
{
s_total_obj_count++;
}
/// Virtual dtor.
virtual ~RefCounted()
{
nvCheck( m_count == 0 );
nvCheck( s_total_obj_count > 0 );
s_total_obj_count--;
}
/// Increase reference count.
uint addRef() const
{
s_total_ref_count++;
m_count++;
return m_count;
}
/// Decrease reference count and remove when 0.
uint release() const
{
nvCheck( m_count > 0 );
s_total_ref_count--;
m_count--;
if( m_count == 0 ) {
releaseWeakProxy();
delete this;
return 0;
}
return m_count;
}
/// Get weak proxy.
WeakProxy * getWeakProxy() const
{
if (m_weak_proxy == NULL) {
m_weak_proxy = new WeakProxy;
m_weak_proxy->AddRef();
}
return m_weak_proxy;
}
/// Release the weak proxy.
void releaseWeakProxy() const
{
if (m_weak_proxy != NULL) {
m_weak_proxy->NotifyObjectDied();
m_weak_proxy->Release();
m_weak_proxy = NULL;
}
}
/** @name Debug methods: */
//@{
/// Get reference count.
int refCount() const
{
return m_count;
}
/// Get total number of objects.
static int totalObjectCount()
{
return s_total_obj_count;
}
/// Get total number of references.
static int totalReferenceCount()
{
return s_total_ref_count;
}
//@}
private:
NVCORE_API static int s_total_ref_count;
NVCORE_API static int s_total_obj_count;
mutable int m_count;
mutable WeakProxy * weak_proxy;
};
#endif
/// Smart pointer template class.
template <class BaseClass>
class SmartPtr {
class Pointer {
public:
// BaseClass must implement addRef() and release().
typedef SmartPtr<BaseClass> ThisType;
typedef Pointer<BaseClass> ThisType;
/// Default ctor.
SmartPtr() : m_ptr(NULL)
Pointer() : m_ptr(NULL)
{
}
/** Other type assignment. */
template <class OtherBase>
SmartPtr( const SmartPtr<OtherBase> & tc )
Pointer( const Pointer<OtherBase> & tc )
{
m_ptr = static_cast<BaseClass *>( tc.ptr() );
if( m_ptr ) {
@ -123,7 +220,7 @@ public:
}
/** Copy ctor. */
SmartPtr( const ThisType & bc )
Pointer( const ThisType & bc )
{
m_ptr = bc.ptr();
if( m_ptr ) {
@ -131,8 +228,8 @@ public:
}
}
/** Copy cast ctor. SmartPtr(NULL) is valid. */
explicit SmartPtr( BaseClass * bc )
/** Copy cast ctor. Pointer(NULL) is valid. */
explicit Pointer( BaseClass * bc )
{
m_ptr = bc;
if( m_ptr ) {
@ -141,7 +238,7 @@ public:
}
/** Dtor. */
~SmartPtr()
~Pointer()
{
set(NULL);
}
@ -152,14 +249,14 @@ public:
/** -> operator. */
BaseClass * operator -> () const
{
nvCheck( m_ptr != NULL );
piCheck( m_ptr != NULL );
return m_ptr;
}
/** * operator. */
BaseClass & operator*() const
{
nvCheck( m_ptr != NULL );
piCheck( m_ptr != NULL );
return *m_ptr;
}
@ -175,7 +272,7 @@ public:
//@{
/** Other type assignment. */
template <class OtherBase>
void operator = ( const SmartPtr<OtherBase> & tc )
void operator = ( const Pointer<OtherBase> & tc )
{
set( static_cast<BaseClass *>(tc.ptr()) );
}
@ -198,7 +295,7 @@ public:
//@{
/** Other type equal comparation. */
template <class OtherBase>
bool operator == ( const SmartPtr<OtherBase> & other ) const
bool operator == ( const Pointer<OtherBase> & other ) const
{
return m_ptr == other.ptr();
}
@ -217,7 +314,7 @@ public:
/** Other type not equal comparation. */
template <class OtherBase>
bool operator != ( const SmartPtr<OtherBase> & other ) const
bool operator != ( const Pointer<OtherBase> & other ) const
{
return m_ptr != other.ptr();
}

View File

@ -1,9 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "RefCounted.h"
using namespace nv;
int nv::RefCounted::s_total_ref_count = 0;
int nv::RefCounted::s_total_obj_count = 0;

View File

@ -1,114 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_REFCOUNTED_H
#define NV_CORE_REFCOUNTED_H
#include <nvcore/nvcore.h>
#include <nvcore/Debug.h>
namespace nv
{
/// Reference counted base class to be used with SmartPtr and WeakPtr.
class RefCounted
{
NV_FORBID_COPY(RefCounted);
public:
/// Ctor.
RefCounted() : m_count(0)/*, m_weak_proxy(NULL)*/
{
s_total_obj_count++;
}
/// Virtual dtor.
virtual ~RefCounted()
{
nvCheck( m_count == 0 );
nvCheck( s_total_obj_count > 0 );
s_total_obj_count--;
}
/// Increase reference count.
uint addRef() const
{
s_total_ref_count++;
m_count++;
return m_count;
}
/// Decrease reference count and remove when 0.
uint release() const
{
nvCheck( m_count > 0 );
s_total_ref_count--;
m_count--;
if( m_count == 0 ) {
// releaseWeakProxy();
delete this;
return 0;
}
return m_count;
}
/*
/// Get weak proxy.
WeakProxy * getWeakProxy() const
{
if (m_weak_proxy == NULL) {
m_weak_proxy = new WeakProxy;
m_weak_proxy->AddRef();
}
return m_weak_proxy;
}
/// Release the weak proxy.
void releaseWeakProxy() const
{
if (m_weak_proxy != NULL) {
m_weak_proxy->NotifyObjectDied();
m_weak_proxy->Release();
m_weak_proxy = NULL;
}
}
*/
/** @name Debug methods: */
//@{
/// Get reference count.
int refCount() const
{
return m_count;
}
/// Get total number of objects.
static int totalObjectCount()
{
return s_total_obj_count;
}
/// Get total number of references.
static int totalReferenceCount()
{
return s_total_ref_count;
}
//@}
private:
NVCORE_API static int s_total_ref_count;
NVCORE_API static int s_total_obj_count;
mutable int m_count;
// mutable WeakProxy * weak_proxy;
};
} // nv namespace
#endif // NV_CORE_REFCOUNTED_H

View File

@ -48,7 +48,7 @@ const char * TextReader::readToEnd()
m_text.reserve(size + 1);
m_text.resize(size);
m_stream->serialize(m_text.mutableBuffer(), size);
m_stream->serialize(m_text.unsecureBuffer(), size);
m_text.pushBack('\0');
return m_text.buffer();

View File

@ -22,7 +22,6 @@
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Stream.h>
#include <nvcore/Containers.h> // swap
#include "ColorBlock.h"
#include "BlockDXT.h"

View File

@ -1,6 +1,5 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Containers.h> // swap
#include <nvmath/Box.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/Image.h>

View File

@ -532,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (0 << 16) | (9 << 8) | (5); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (4); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;

View File

@ -244,7 +244,7 @@ SincFilter::SincFilter(float w) : Filter(w) {}
float SincFilter::evaluate(float x) const
{
return 0.0f;
return sincf(PI * x);
}
@ -541,12 +541,17 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples/*= 32*/)
{
nvCheck(srcLength >= dstLength); // @@ Upsampling not implemented!
nvDebugCheck(samples > 0);
const float scale = float(dstLength) / float(srcLength);
float scale = float(dstLength) / float(srcLength);
const float iscale = 1.0f / scale;
if (scale > 1) {
// Upsampling.
samples = 1;
scale = 1;
}
m_length = dstLength;
m_width = f.width() * iscale;
m_windowSize = (int)ceilf(m_width * 2) + 1;
@ -577,6 +582,7 @@ PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLengt
m_data[i * m_windowSize + j] /= total;
}
}
}
PolyphaseKernel::~PolyphaseKernel()

View File

@ -11,16 +11,16 @@ namespace nv
class Vector4;
/// Base filter class.
class NVIMAGE_CLASS Filter
class Filter
{
public:
Filter(float width);
virtual ~Filter();
NVIMAGE_API Filter(float width);
NVIMAGE_API virtual ~Filter();
float width() const { return m_width; }
float sampleDelta(float x, float scale) const;
float sampleBox(float x, float scale, int samples) const;
float sampleTriangle(float x, float scale, int samples) const;
NVIMAGE_API float width() const { return m_width; }
NVIMAGE_API float sampleDelta(float x, float scale) const;
NVIMAGE_API float sampleBox(float x, float scale, int samples) const;
NVIMAGE_API float sampleTriangle(float x, float scale, int samples) const;
virtual float evaluate(float x) const = 0;
@ -29,56 +29,56 @@ namespace nv
};
// Box filter.
class NVIMAGE_CLASS BoxFilter : public Filter
class BoxFilter : public Filter
{
public:
BoxFilter();
BoxFilter(float width);
virtual float evaluate(float x) const;
NVIMAGE_API BoxFilter();
NVIMAGE_API BoxFilter(float width);
NVIMAGE_API virtual float evaluate(float x) const;
};
// Triangle (bilinear/tent) filter.
class NVIMAGE_CLASS TriangleFilter : public Filter
class TriangleFilter : public Filter
{
public:
TriangleFilter();
TriangleFilter(float width);
virtual float evaluate(float x) const;
NVIMAGE_API TriangleFilter();
NVIMAGE_API TriangleFilter(float width);
NVIMAGE_API virtual float evaluate(float x) const;
};
// Quadratic (bell) filter.
class NVIMAGE_CLASS QuadraticFilter : public Filter
class QuadraticFilter : public Filter
{
public:
QuadraticFilter();
virtual float evaluate(float x) const;
NVIMAGE_API QuadraticFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
// Cubic filter from Thatcher Ulrich.
class NVIMAGE_CLASS CubicFilter : public Filter
class CubicFilter : public Filter
{
public:
CubicFilter();
virtual float evaluate(float x) const;
NVIMAGE_API CubicFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
// Cubic b-spline filter from Paul Heckbert.
class NVIMAGE_CLASS BSplineFilter : public Filter
class BSplineFilter : public Filter
{
public:
BSplineFilter();
virtual float evaluate(float x) const;
NVIMAGE_API BSplineFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
/// Mitchell & Netravali's two-param cubic
/// @see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88
class NVIMAGE_CLASS MitchellFilter : public Filter
class MitchellFilter : public Filter
{
public:
MitchellFilter();
virtual float evaluate(float x) const;
NVIMAGE_API MitchellFilter();
NVIMAGE_API virtual float evaluate(float x) const;
void setParameters(float a, float b);
NVIMAGE_API void setParameters(float a, float b);
private:
float p0, p2, p3;
@ -86,29 +86,29 @@ namespace nv
};
// Lanczos3 filter.
class NVIMAGE_CLASS LanczosFilter : public Filter
class LanczosFilter : public Filter
{
public:
LanczosFilter();
virtual float evaluate(float x) const;
NVIMAGE_API LanczosFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
// Sinc filter.
class NVIMAGE_CLASS SincFilter : public Filter
class SincFilter : public Filter
{
public:
SincFilter(float w);
virtual float evaluate(float x) const;
NVIMAGE_API SincFilter(float w);
NVIMAGE_API virtual float evaluate(float x) const;
};
// Kaiser filter.
class NVIMAGE_CLASS KaiserFilter : public Filter
class KaiserFilter : public Filter
{
public:
KaiserFilter(float w);
virtual float evaluate(float x) const;
NVIMAGE_API KaiserFilter(float w);
NVIMAGE_API virtual float evaluate(float x) const;
void setParameters(float a, float stretch);
NVIMAGE_API void setParameters(float a, float stretch);
private:
float alpha;
@ -118,12 +118,12 @@ namespace nv
/// A 1D kernel. Used to precompute filter weights.
class NVIMAGE_CLASS Kernel1
class Kernel1
{
NV_FORBID_COPY(Kernel1);
public:
Kernel1(const Filter & f, int iscale, int samples = 32);
~Kernel1();
NVIMAGE_API Kernel1(const Filter & f, int iscale, int samples = 32);
NVIMAGE_API ~Kernel1();
float valueAt(uint x) const {
nvDebugCheck(x < (uint)m_windowSize);
@ -138,7 +138,7 @@ namespace nv
return m_width;
}
void debugPrint();
NVIMAGE_API void debugPrint();
private:
int m_windowSize;
@ -148,15 +148,15 @@ namespace nv
/// A 2D kernel.
class NVIMAGE_CLASS Kernel2
class Kernel2
{
public:
Kernel2(uint width);
Kernel2(const Kernel2 & k);
~Kernel2();
NVIMAGE_API Kernel2(uint width);
NVIMAGE_API Kernel2(const Kernel2 & k);
NVIMAGE_API ~Kernel2();
void normalize();
void transpose();
NVIMAGE_API void normalize();
NVIMAGE_API void transpose();
float valueAt(uint x, uint y) const {
return m_data[y * m_windowSize + x];
@ -166,12 +166,12 @@ namespace nv
return m_windowSize;
}
void initLaplacian();
void initEdgeDetection();
void initSobel();
void initPrewitt();
NVIMAGE_API void initLaplacian();
NVIMAGE_API void initEdgeDetection();
NVIMAGE_API void initSobel();
NVIMAGE_API void initPrewitt();
void initBlendedSobel(const Vector4 & scale);
NVIMAGE_API void initBlendedSobel(const Vector4 & scale);
private:
const uint m_windowSize;
@ -180,12 +180,12 @@ namespace nv
/// A 1D polyphase kernel
class NVIMAGE_CLASS PolyphaseKernel
class PolyphaseKernel
{
NV_FORBID_COPY(PolyphaseKernel);
public:
PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32);
~PolyphaseKernel();
NVIMAGE_API PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32);
NVIMAGE_API ~PolyphaseKernel();
int windowSize() const {
return m_windowSize;
@ -205,7 +205,7 @@ namespace nv
return m_data[column * m_windowSize + x];
}
void debugPrint() const;
NVIMAGE_API void debugPrint() const;
private:
int m_windowSize;

View File

@ -376,7 +376,7 @@ FloatImage * FloatImage::fastDownSample() const
{
const uint n = w * h;
if (n & 1)
if ((m_width * m_height) & 1)
{
const float scale = 1.0f / (2 * n + 1);
@ -540,73 +540,18 @@ FloatImage * FloatImage::fastDownSample() const
return dst_image.release();
}
/*
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Kernel1 & kernel, WrapMode wm) const
{
const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2);
return downSample(kernel, w, h, wm);
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Kernel1 & kernel, uint w, uint h, WrapMode wm) const
{
nvCheck(!(kernel.windowSize() & 1)); // Make sure that kernel m_width is even.
AutoPtr<FloatImage> tmp_image( new FloatImage() );
tmp_image->allocate(m_componentNum, w, m_height);
AutoPtr<FloatImage> dst_image( new FloatImage() );
dst_image->allocate(m_componentNum, w, h);
const float xscale = float(m_width) / float(w);
const float yscale = float(m_height) / float(h);
for(uint c = 0; c < m_componentNum; c++) {
float * tmp_channel = tmp_image->channel(c);
for(uint y = 0; y < m_height; y++) {
for(uint x = 0; x < w; x++) {
float sum = this->applyKernelHorizontal(&kernel, uint(x*xscale), y, c, wm);
const uint tmp_index = tmp_image->index(x, y);
tmp_channel[tmp_index] = sum;
}
}
float * dst_channel = dst_image->channel(c);
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
float sum = tmp_image->applyKernelVertical(&kernel, uint(x*xscale), uint(y*yscale), c, wm);
const uint dst_index = dst_image->index(x, y);
dst_channel[dst_index] = sum;
}
}
}
return dst_image.release();
}
*/
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm) const
{
const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2);
return downSample(filter, w, h, wm);
return resize(filter, w, h, wm);
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapMode wm) const
FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm) const
{
// @@ Use monophase filters when frac(m_width / w) == 0
@ -636,7 +581,7 @@ FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapM
float * dst_channel = dst_image->channel(c);
for (uint x = 0; x < w; x++) {
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer());
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer());
for (uint y = 0; y < h; y++) {
dst_channel[y * w + x] = tmp_column[y];
@ -657,7 +602,7 @@ FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapM
float * tmp_channel = tmp_image->channel(c);
for (uint x = 0; x < w; x++) {
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer());
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer());
for (uint y = 0; y < h; y++) {
tmp_channel[y * w + x] = tmp_column[y];

View File

@ -3,10 +3,8 @@
#ifndef NV_IMAGE_FLOATIMAGE_H
#define NV_IMAGE_FLOATIMAGE_H
#include <stdlib.h> // abs
#include <nvcore/Debug.h>
#include <nvcore/Algorithms.h> // clamp
#include <nvcore/Containers.h> // clamp
#include <nvimage/nvimage.h>
namespace nv
@ -65,7 +63,7 @@ public:
NVIMAGE_API FloatImage * fastDownSample() const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, uint w, uint h, WrapMode wm) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const;
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const;
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const;
@ -228,11 +226,15 @@ inline uint FloatImage::indexRepeat(int x, int y) const
inline uint FloatImage::indexMirror(int x, int y) const
{
if (m_width == 1) x = 0;
x = abs(x);
while (x >= m_width) {
x = abs(m_width + m_width - x - 2);
}
if (m_height == 1) y = 0;
y = abs(y);
while (y >= m_height) {
y = abs(m_height + m_height - y - 2);

View File

@ -2,7 +2,6 @@
#include <nvcore/Debug.h>
#include <nvcore/Ptr.h>
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>

View File

@ -162,9 +162,6 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s)
if (strCaseCmp(extension, ".pfm") == 0) {
return loadFloatPFM(fileName, s);
}
if (strCaseCmp(extension, ".hdr") == 0) {
return loadGridFloat(fileName, s);
}
*/
return NULL;
@ -797,7 +794,7 @@ Image * nv::ImageIO::loadJPG(Stream & s)
// Read the entire file.
Array<uint8> byte_array;
byte_array.resize(s.size());
s.serialize(byte_array.mutableBuffer(), s.size());
s.serialize(byte_array.unsecureBuffer(), s.size());
jpeg_decompress_struct cinfo;
jpeg_error_mgr jerr;
@ -1092,8 +1089,7 @@ namespace
virtual void seekg(Imf::Int64 pos)
{
nvDebugCheck(pos >= 0 && pos < UINT_MAX);
m_stream.seek((uint)pos);
m_stream.seek(pos);
}
virtual void clear()
@ -1302,77 +1298,6 @@ bool nv::ImageIO::saveFloatPFM(const char * fileName, const FloatImage * fimage,
return true;
}
#pragma warning(disable : 4996)
NVIMAGE_API FloatImage * nv::ImageIO::loadGridFloat(const char * fileName, Stream & s)
{
nvCheck(s.isLoading());
nvCheck(!s.isError());
Tokenizer parser(&s);
parser.nextLine();
if (parser.token() != "ncols")
{
nvDebug("Failed to find 'ncols' token in file '%s'.\n", fileName);
return NULL;
}
parser.nextToken(true);
const int nCols = parser.token().toInt();
parser.nextToken(true);
if (parser.token() != "nrows")
{
nvDebug("Failed to find 'nrows' token in file '%s'.\n", fileName);
return NULL;
}
parser.nextToken(true);
const int nRows = parser.token().toInt();
/* There's a byte order defined in the header. We could read it. However, here we
just assume that it matches the platform's byte order.
// There is then a bunch of data that we don't care about (lat, long definitions, etc).
for (int i=0; i!=9; ++i)
parser.nextToken(true);
if (parser.token() != "byteorder")
return NULL;
parser.nextToken(true);
const Stream::ByteOrder byteOrder = (parser.token() == "LSBFIRST")? Stream::LittleEndian: Stream::BigEndian;
*/
// GridFloat comes in two files: an ASCII header which was parsed above (.hdr) and a big blob
// of binary data in a .flt file.
Path dataPath(fileName);
dataPath.stripExtension();
dataPath.append(".flt");
// Open the binary data.
FILE* file = fopen(dataPath.fileName(), "rb");
if (!file)
{
nvDebug("Failed to find GridFloat blob file '%s' corresponding to '%s'.\n", dataPath.fileName(), fileName);
return NULL;
}
// Allocate image.
AutoPtr<FloatImage> fimage(new FloatImage());
fimage->allocate(1, nCols, nRows);
float * channel = fimage->channel(0);
// The binary blob is defined to be in row-major order, containing IEEE floats.
// So we can just slurp it in. Theoretically, we ought to use the byte order.
const size_t nRead = fread((void*) channel, sizeof(float), nRows * nCols, file);
fclose(file);
return fimage.release();
}
#endif
#if 0

View File

@ -47,15 +47,10 @@ namespace nv
NVIMAGE_API bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
#endif
/*
NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s);
NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
// GridFloat is a simple, open format for terrain elevation data. See http://ned.usgs.gov/Ned/faq.asp.
// Expects: 1) fileName will be an ".hdr" header file, 2) there will also exist a corresponding float data
// blob in a ".flt" file. (This is what USGS gives you.)
NVIMAGE_API FloatImage * loadGridFloat(const char * fileName, Stream & s);
*/
// NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s);
// NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
} // ImageIO namespace
} // nv namespace

View File

@ -12,14 +12,11 @@ http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT
@@ This code needs to be reviewed, I'm not sure it's correct.
*/
#include <string.h> // memset
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include <nvimage/Quantize.h>
#include <nvimage/PixelFormat.h>
using namespace nv;
@ -51,94 +48,20 @@ void nv::Quantize::BinaryAlpha( Image * image, int alpha_threshold /*= 127*/ )
// Simple quantization.
void nv::Quantize::RGB16( Image * image )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel32 = image->pixel(x, y);
// Convert to 16 bit and back to 32 using regular bit expansion.
Color32 pixel16 = toColor32( toColor16(pixel32) );
// Store color.
image->pixel(x, y) = pixel16;
}
}
Truncate(image, 5, 6, 5, 8);
}
// Alpha quantization.
void nv::Quantize::Alpha4( Image * image )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Convert to 4 bit using regular bit expansion.
pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
// Store color.
image->pixel(x, y) = pixel;
}
}
Truncate(image, 8, 8, 8, 4);
}
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg_RGB16( Image * image )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
// @@ Use fixed point?
Vector3 * row0 = new Vector3[w+2];
Vector3 * row1 = new Vector3[w+2];
memset(row0, 0, sizeof(Vector3)*(w+2));
memset(row1, 0, sizeof(Vector3)*(w+2));
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel32 = image->pixel(x, y);
// Add error. // @@ We shouldn't clamp here!
pixel32.r = clamp(int(pixel32.r) + int(row0[1+x].x()), 0, 255);
pixel32.g = clamp(int(pixel32.g) + int(row0[1+x].y()), 0, 255);
pixel32.b = clamp(int(pixel32.b) + int(row0[1+x].z()), 0, 255);
// Convert to 16 bit. @@ Use regular clamp?
Color32 pixel16 = toColor32( toColor16(pixel32) );
// Store color.
image->pixel(x, y) = pixel16;
// Compute new error.
Vector3 diff(float(pixel32.r - pixel16.r), float(pixel32.g - pixel16.g), float(pixel32.b - pixel16.b));
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
row1[1+x-1] += 3.0f / 16.0f * diff;
row1[1+x+0] += 5.0f / 16.0f * diff;
row1[1+x+1] += 1.0f / 16.0f * diff;
}
swap(row0, row1);
memset(row1, 0, sizeof(Vector3)*(w+2));
}
delete [] row0;
delete [] row1;
FloydSteinberg(image, 5, 6, 5, 8);
}
@ -192,34 +115,90 @@ void nv::Quantize::FloydSteinberg_BinaryAlpha( Image * image, int alpha_threshol
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg_Alpha4( Image * image )
{
FloydSteinberg(image, 8, 8, 8, 4);
}
void nv::Quantize::Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
// @@ Use fixed point?
float * row0 = new float[(w+2)];
float * row1 = new float[(w+2)];
memset(row0, 0, sizeof(float)*(w+2));
memset(row1, 0, sizeof(float)*(w+2));
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Add error.
int alpha = int(pixel.a) + int(row0[1+x]);
// Convert to our desired size, and reconstruct.
pixel.r = PixelFormat::convert(pixel.r, 8, rsize);
pixel.r = PixelFormat::convert(pixel.r, rsize, 8);
// Convert to 4 bit using regular bit expansion.
pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
pixel.g = PixelFormat::convert(pixel.g, 8, gsize);
pixel.g = PixelFormat::convert(pixel.g, gsize, 8);
pixel.b = PixelFormat::convert(pixel.b, 8, bsize);
pixel.b = PixelFormat::convert(pixel.b, bsize, 8);
pixel.a = PixelFormat::convert(pixel.a, 8, asize);
pixel.a = PixelFormat::convert(pixel.a, asize, 8);
// Store color.
image->pixel(x, y) = pixel;
}
}
}
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
Vector4 * row0 = new Vector4[w+2];
Vector4 * row1 = new Vector4[w+2];
memset(row0, 0, sizeof(Vector4)*(w+2));
memset(row1, 0, sizeof(Vector4)*(w+2));
for (uint y = 0; y < h; y++) {
for (uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Add error.
pixel.r = clamp(int(pixel.r) + int(row0[1+x].x()), 0, 255);
pixel.g = clamp(int(pixel.g) + int(row0[1+x].y()), 0, 255);
pixel.b = clamp(int(pixel.b) + int(row0[1+x].z()), 0, 255);
pixel.a = clamp(int(pixel.a) + int(row0[1+x].w()), 0, 255);
int r = pixel.r;
int g = pixel.g;
int b = pixel.b;
int a = pixel.a;
// Convert to our desired size, and reconstruct.
r = PixelFormat::convert(r, 8, rsize);
r = PixelFormat::convert(r, rsize, 8);
g = PixelFormat::convert(g, 8, gsize);
g = PixelFormat::convert(g, gsize, 8);
b = PixelFormat::convert(b, 8, bsize);
b = PixelFormat::convert(b, bsize, 8);
a = PixelFormat::convert(a, 8, asize);
a = PixelFormat::convert(a, asize, 8);
// Store color.
image->pixel(x, y) = Color32(r, g, b, a);
// Compute new error.
float diff = float(alpha - pixel.a);
Vector4 diff(float(int(pixel.r) - r), float(int(pixel.g) - g), float(int(pixel.b) - b), float(int(pixel.a) - a));
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
@ -229,10 +208,9 @@ void nv::Quantize::FloydSteinberg_Alpha4( Image * image )
}
swap(row0, row1);
memset(row1, 0, sizeof(float)*(w+2));
memset(row1, 0, sizeof(Vector4)*(w+2));
}
delete [] row0;
delete [] row1;
}

View File

@ -17,6 +17,9 @@ namespace nv
void FloydSteinberg_BinaryAlpha(Image * img, int alpha_threshold = 127);
void FloydSteinberg_Alpha4(Image * img);
void Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize);
void FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize);
// @@ Add palette quantization algorithms!
}
}

View File

@ -49,7 +49,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
return;
}
}
normal = nv::normalize(normal, epsilon);
normal = ::normalize(normal, epsilon);
tangent -= normal * dot(normal, tangent);
bitangent -= normal * dot(normal, bitangent);
@ -68,8 +68,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
}
else
{
#if 0
tangent = nv::normalize(tangent, epsilon);
tangent = ::normalize(tangent, epsilon);
bitangent -= tangent * dot(tangent, bitangent);
if (length(bitangent) < epsilon)
@ -79,47 +78,11 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
}
else
{
bitangent = nv::normalize(bitangent, epsilon);
tangent = ::normalize(tangent, epsilon);
}
#else
if (length(bitangent) < epsilon)
{
bitangent = cross(tangent, normal);
nvCheck(isNormalized(bitangent));
}
else
{
tangent = nv::normalize(tangent);
bitangent = nv::normalize(bitangent);
Vector3 bisector = nv::normalize(tangent + bitangent);
Vector3 axis = cross(bisector, normal);
nvDebugCheck(isNormalized(axis, epsilon));
nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon));
if (dot(axis, tangent) > 0)
{
tangent = nv::normalize(bisector + axis);
bitangent = nv::normalize(bisector - axis);
}
else
{
tangent = nv::normalize(bisector - axis);
bitangent = nv::normalize(bisector + axis);
}
}
#endif
}
/*// Check vector lengths.
if (!isNormalized(normal, epsilon))
{
nvDebug("%f %f %f\n", normal.x(), normal.y(), normal.z());
nvDebug("%f %f %f\n", tangent.x(), tangent.y(), tangent.z());
nvDebug("%f %f %f\n", bitangent.x(), bitangent.y(), bitangent.z());
}*/
// Check vector lengths.
nvCheck(isNormalized(normal, epsilon));
nvCheck(isNormalized(tangent, epsilon));
nvCheck(isNormalized(bitangent, epsilon));
@ -162,18 +125,9 @@ void Basis::buildFrameForDirection(Vector3::Arg d)
bitangent = cross(normal, tangent);
}
bool Basis::isValid() const
{
if (equal(normal, Vector3(zero))) return false;
if (equal(tangent, Vector3(zero))) return false;
if (equal(bitangent, Vector3(zero))) return false;
if (equal(determinant(), 0.0f)) return false;
return true;
}
/*
/// Transform by this basis. (From this basis to object space).
Vector3 Basis::transform(Vector3::Arg v) const
{
@ -190,31 +144,30 @@ Vector3 Basis::transformT(Vector3::Arg v)
}
/// Transform by the inverse. (From object space to this basis).
/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned.
/// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned.
Vector3 Basis::transformI(Vector3::Arg v) const
{
const float det = determinant();
nvDebugCheck(!equal(det, 0.0f, 0.0f));
nvCheck(!equalf(det, 0.0f));
const float idet = 1.0f / det;
// Rows of the inverse matrix.
Vector3 r0(
(bitangent.y() * normal.z() - bitangent.z() * normal.y()),
-(bitangent.x() * normal.z() - bitangent.z() * normal.x()),
(bitangent.x() * normal.y() - bitangent.y() * normal.x()));
Vector3 r0, r1, r2;
r0.x = (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet;
r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet;
r0.z = (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet;
Vector3 r1(
-(tangent.y() * normal.z() - tangent.z() * normal.y()),
(tangent.x() * normal.z() - tangent.z() * normal.x()),
-(tangent.x() * normal.y() - tangent.y() * normal.x()));
r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet;
r1.y = (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet;
r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet;
Vector3 r2(
(tangent.y() * bitangent.z() - tangent.z() * bitangent.y()),
-(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()),
(tangent.x() * bitangent.y() - tangent.y() * bitangent.x()));
r2.x = (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet;
r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet;
r2.z = (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet;
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet;
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2));
}
*/

View File

@ -54,8 +54,7 @@ namespace nv
tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y();
}
bool isValid() const;
/*
// Get transform matrix for this basis.
NVMATH_API Matrix matrix() const;
@ -67,7 +66,7 @@ namespace nv
// Transform by the inverse. (From object space to this basis).
NVMATH_API Vector3 transformI(Vector3::Arg v) const;
*/
Vector3 tangent;
Vector3 bitangent;

View File

@ -9,7 +9,6 @@
namespace nv
{
class Stream;
/// Axis Aligned Bounding Box.
class Box
@ -28,13 +27,11 @@ public:
// Cast operators.
operator const float * () const { return reinterpret_cast<const float *>(this); }
// Min corner of the box.
Vector3 minCorner() const { return m_mins; }
Vector3 & minCorner() { return m_mins; }
/// Min corner of the box.
Vector3 mins() const { return m_mins; }
// Max corner of the box.
Vector3 maxCorner() const { return m_maxs; }
Vector3 & maxCorner() { return m_maxs; }
/// Max corner of the box.
Vector3 maxs() const { return m_maxs; }
/// Clear the bounds.
void clearBounds()
@ -111,7 +108,7 @@ public:
float area() const
{
const Vector3 d = extents();
return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
}
/// Get the volume of the box.
@ -121,16 +118,6 @@ public:
return 8.0f * (d.x() * d.y() * d.z());
}
/// Return true if the box contains the given point.
bool contains(Vector3::Arg p) const
{
return
m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
}
friend Stream & operator<< (Stream & s, Box & box);
private:
Vector3 m_mins;
@ -138,6 +125,15 @@ private:
};
/*
/// Point inside box test.
inline bool pointInsideBox(const Box & b, Vector3::Arg p) const
{
return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z());
}
*/
} // nv namespace

View File

@ -5,19 +5,13 @@ SET(MATH_SRCS
Vector.h
Matrix.h
Quaternion.h
Plane.h Plane.cpp
Box.h
Color.h
Montecarlo.h Montecarlo.cpp
Random.h Random.cpp
SphericalHarmonic.h SphericalHarmonic.cpp
Basis.h Basis.cpp
Triangle.h Triangle.cpp TriBox.cpp
Polygon.h Polygon.cpp
TypeSerialization.h TypeSerialization.cpp
Sparse.h Sparse.cpp
Solver.h Solver.cpp
KahanSum.h)
Triangle.h Triangle.cpp TriBox.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -1,38 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_KAHANSUM_H
#define NV_MATH_KAHANSUM_H
#include <nvmath/nvmath.h>
namespace nv
{
class KahanSum
{
public:
KahanSum() : accum(0.0f), err(0) {};
void add(float f)
{
float compensated = f + err;
float tmp = accum + compensated;
err = accum - tmp;
err += compensated;
accum = tmp;
}
float sum() const
{
return accum;
}
private:
float accum;
float err;
};
} // nv namespace
#endif // NV_MATH_KAHANSUM_H

View File

@ -1,168 +0,0 @@
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
#include <nvmath/Polygon.h>
#include <nvmath/Triangle.h>
#include <nvmath/Plane.h>
using namespace nv;
Polygon::Polygon()
{
}
Polygon::Polygon(const Triangle & t)
{
pointArray.resize(3);
pointArray[0] = t.v[0];
pointArray[1] = t.v[1];
pointArray[2] = t.v[2];
}
Polygon::Polygon(const Vector3 * points, uint vertexCount)
{
pointArray.resize(vertexCount);
for (uint i = 0; i < vertexCount; i++)
{
pointArray[i] = points[i];
}
}
/// Compute polygon area.
float Polygon::area() const
{
float total = 0;
const uint pointCount = pointArray.count();
for (uint i = 2; i < pointCount; i++)
{
Vector3 v1 = pointArray[i-1] - pointArray[0];
Vector3 v2 = pointArray[i] - pointArray[0];
total += 0.5f * length(cross(v1, v2));
}
return total;
}
/// Get the bounds of the polygon.
Box Polygon::bounds() const
{
Box bounds;
bounds.clearBounds();
foreach(p, pointArray)
{
bounds.addPointToBounds(pointArray[p]);
}
return bounds;
}
/// Get the plane of the polygon.
Plane Polygon::plane() const
{
// @@ Do something better than this?
Vector3 n = cross(pointArray[1] - pointArray[0], pointArray[2] - pointArray[0]);
return Vector4(n, dot(n, pointArray[0]));
}
/// Clip polygon to box.
uint Polygon::clipTo(const Box & box)
{
const Plane posX( 1, 0, 0, box.maxCorner().x());
const Plane negX(-1, 0, 0,-box.minCorner().x());
const Plane posY( 0, 1, 0, box.maxCorner().y());
const Plane negY( 0,-1, 0,-box.minCorner().y());
const Plane posZ( 0, 0, 1, box.maxCorner().z());
const Plane negZ( 0, 0,-1,-box.minCorner().z());
if (clipTo(posX) == 0) return 0;
if (clipTo(negX) == 0) return 0;
if (clipTo(posY) == 0) return 0;
if (clipTo(negY) == 0) return 0;
if (clipTo(posZ) == 0) return 0;
if (clipTo(negZ) == 0) return 0;
return pointArray.count();
}
/// Clip polygon to plane.
uint Polygon::clipTo(const Plane & plane)
{
int count = 0;
const uint pointCount = pointArray.count();
Array<Vector3> newPointArray(pointCount + 1); // @@ Do not create copy every time.
Vector3 prevPoint = pointArray[pointCount - 1];
float prevDist = dot(plane.vector(), prevPoint) - plane.offset();
for (uint i = 0; i < pointCount; i++)
{
const Vector3 point = pointArray[i];
float dist = dot(plane.vector(), point) - plane.offset();
// @@ Handle points on plane better.
if (dist <= 0) // interior.
{
if (prevDist > 0) // exterior
{
// Add segment intersection point.
Vector3 dp = point - prevPoint;
float t = dist / prevDist;
newPointArray.append(point - dp * t);
}
// Add interior point.
newPointArray.append(point);
}
else if (dist > 0 && prevDist < 0)
{
// Add segment intersection point.
Vector3 dp = point - prevPoint;
float t = dist / prevDist;
newPointArray.append(point - dp * t);
}
prevPoint = point;
prevDist = dist;
}
swap(pointArray, newPointArray);
return count;
}
void Polygon::removeColinearPoints()
{
const uint pointCount = pointArray.count();
Array<Vector3> newPointArray(pointCount);
for (uint i = 0 ; i < pointCount; i++)
{
int j = (i + 1) % pointCount;
int k = (i + pointCount - 1) % pointCount;
Vector3 v1 = normalize(pointArray[j] - pointArray[i]);
Vector3 v2 = normalize(pointArray[i] - pointArray[k]);
if (dot(v1, v2) < 0.999)
{
newPointArray.append(pointArray[i]);
}
}
swap(pointArray, newPointArray);
}

View File

@ -1,45 +0,0 @@
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
#ifndef NV_MATH_POLYGON_H
#define NV_MATH_POLYGON_H
#include <nvcore/Containers.h>
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
#include <nvmath/Box.h>
namespace nv
{
class Box;
class Plane;
class Triangle;
class Polygon
{
NV_FORBID_COPY(Polygon);
public:
Polygon();
Polygon(const Triangle & t);
Polygon(const Vector3 * points, uint vertexCount);
float area() const;
Box bounds() const;
Plane plane() const;
uint clipTo(const Box & box);
uint clipTo(const Plane & plane);
void removeColinearPoints();
private:
Array<Vector3> pointArray;
};
} // nv namespace
#endif // NV_MATH_POLYGON_H

View File

@ -51,6 +51,7 @@ namespace nv
inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b)
{
// @@ Efficient SIMD implementation?
return Quaternion(
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(),
@ -58,40 +59,6 @@ namespace nv
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w());
}
inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b)
{
return Quaternion(
+ a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
- a.x() * b.z() + a.z()*b.x() + a.w()*b.y(),
+ a.x() * b.y() - a.y()*b.x() + a.w()*b.z(),
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() );
}
inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b)
{
return Quaternion(
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y(),
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x(),
+ a.x() * b.y() - a.y()*b.x() + a.z()*b.w(),
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z());
}
inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b)
{
return mul(a, b);
}
inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b)
{
return mul(a, b);
}
inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b)
{
return mul(a, b);
}
inline Quaternion scale(Quaternion::Arg q, float s)
{
return scale(q.asVector(), s);
@ -155,24 +122,6 @@ namespace nv
return Quaternion(Vector4(v * s, c));
}
inline Vector3 imag(Quaternion::Arg q)
{
return q.asVector().xyz();
}
inline float real(Quaternion::Arg q)
{
return q.w();
}
/// Transform vector.
inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v)
{
Quaternion t = q * v * conjugate(q);
return imag(t);
}
} // nv namespace

View File

@ -1,726 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/Solver.h>
using namespace nv;
namespace
{
class Preconditioner
{
public:
// Virtual dtor.
virtual ~Preconditioner() { }
// Apply preconditioning step.
virtual void apply(const FullVector & x, FullVector & y) const = 0;
};
// Jacobi preconditioner.
class JacobiPreconditioner : public Preconditioner
{
public:
JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width())
{
nvCheck(M.isSquare());
for(uint x = 0; x < M.width(); x++)
{
float elem = M.getCoefficient(x, x);
nvDebugCheck( elem != 0.0f );
if (symmetric)
{
m_inverseDiagonal[x] = 1.0f / sqrt(fabs(elem));
}
else
{
m_inverseDiagonal[x] = 1.0f / elem;
}
}
}
void apply(const FullVector & x, FullVector & y) const
{
y *= x;
}
private:
FullVector m_inverseDiagonal;
};
} // namespace
static int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
static int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
// Solve the symmetric system: At<41>A<EFBFBD>x = At<41>b
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
{
nvDebugCheck(A.width() == x.dimension());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly...
const uint D = A.width();
FullVector Atb(D);
mult(Transposed, A, b, Atb);
SparseMatrix AtA(D);
mult(Transposed, A, NoTransposed, A, AtA);
SymmetricSolver(AtA, Atb, x, epsilon);
}
// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/)
{
nvDebugCheck(A.width() == x.dimension());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(A.height() >= A.width() - lockedCount);
// @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
const uint D = A.width() - lockedCount;
nvDebugCheck(D > 0);
// Compute: b - Al * xl
FullVector b_Alxl(b);
for (uint y = 0; y < A.height(); y++)
{
const uint count = A.getRow(y).count();
for (uint e = 0; e < count; e++)
{
uint column = A.getRow(y)[e].x;
bool isFree = true;
for (uint i = 0; i < lockedCount; i++)
{
isFree &= (lockedParameters[i] != column);
}
if (!isFree)
{
b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
}
}
}
// Remove locked columns from A.
SparseMatrix Af(D, A.height());
for (uint y = 0; y < A.height(); y++)
{
const uint count = A.getRow(y).count();
for (uint e = 0; e < count; e++)
{
uint column = A.getRow(y)[e].x;
uint ix = column;
bool isFree = true;
for (uint i = 0; i < lockedCount; i++)
{
isFree &= (lockedParameters[i] != column);
if (column > lockedParameters[i]) ix--; // shift columns
}
if (isFree)
{
Af.setCoefficient(ix, y, A.getRow(y)[e].v);
}
}
}
// Remove elements from x
FullVector xf(D);
for (uint i = 0, j = 0; i < A.width(); i++)
{
bool isFree = true;
for (uint l = 0; l < lockedCount; l++)
{
isFree &= (lockedParameters[l] != i);
}
if (isFree)
{
xf[j++] = x[i];
}
}
// Solve reduced system.
LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
// Copy results back to x.
for (uint i = 0, j = 0; i < A.width(); i++)
{
bool isFree = true;
for (uint l = 0; l < lockedCount; l++)
{
isFree &= (lockedParameters[l] != i);
}
if (isFree)
{
x[i] = xf[j++];
}
}
}
void nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
{
nvDebugCheck(A.height() == A.width());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(b.dimension() == x.dimension());
// JacobiPreconditioner jacobi(A, true);
// ConjugateGradientSolver(jacobi, A, b, x, epsilon);
ConjugateGradientSolver(A, b, x, epsilon);
}
/**
* Compute the solution of the sparse linear system Ab=x using the Conjugate
* Gradient method.
*
* Solving sparse linear systems:
* (1) A<>x = b
*
* The conjugate gradient algorithm solves (1) only in the case that A is
* symmetric and positive definite. It is based on the idea of minimizing the
* function
*
* (2) f(x) = 1/2<>x<EFBFBD>A<EFBFBD>x - b<>x
*
* This function is minimized when its gradient
*
* (3) df = A<>x - b
*
* is zero, which is equivalent to (1). The minimization is carried out by
* generating a succession of search directions p.k and improved minimizers x.k.
* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k<>p.k),
* and x.k+1 is set equal to the new point x.k + alfa.k<>p.k. The p.k and x.k are
* built up in such a way that x.k+1 is also the minimizer of f over the whole
* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N
* iterations you arrive at the minimizer over the entire vector space, i.e., the
* solution to (1).
*
* For a really good explanation of the method see:
*
* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
* Jonhathan Richard Shewchuk.
*
**/
/*static*/ int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
{
nvDebugCheck( A.isSquare() );
nvDebugCheck( A.width() == b.dimension() );
nvDebugCheck( A.width() == x.dimension() );
int i = 0;
const int D = A.width();
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
FullVector r(D); // residual
FullVector p(D); // search direction
FullVector q(D); //
float delta_0;
float delta_old;
float delta_new;
float alpha;
float beta;
// r = b - A<>x;
copy(b, r);
sgemv(-1, A, x, 1, r);
// p = r;
copy(r, p);
delta_new = dot( r, r );
delta_0 = delta_new;
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
{
i++;
// q = A<>p
mult(A, p, q);
// alpha = delta_new / p<>q
alpha = delta_new / dot( p, q );
// x = alfa<66>p + x
saxpy(alpha, p, x);
if ((i & 31) == 0) // recompute r after 32 steps
{
// r = b - A<>x
copy(b, r);
sgemv(-1, A, x, 1, r);
}
else
{
// r = r - alpha<68>q
saxpy(-alpha, q, r);
}
delta_old = delta_new;
delta_new = dot( r, r );
beta = delta_new / delta_old;
// p = r + beta<74>p
copy(r, p);
saxpy(beta, p, r);
}
return i;
}
// Conjugate gradient with preconditioner.
/*static*/ int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
{
nvDebugCheck( A.isSquare() );
nvDebugCheck( A.width() == b.dimension() );
nvDebugCheck( A.width() == x.dimension() );
int i = 0;
const int D = A.width();
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
FullVector r(D); // residual
FullVector p(D); // search direction
FullVector q(D); //
FullVector s(D); // preconditioned
float delta_0;
float delta_old;
float delta_new;
float alpha;
float beta;
// r = b - A<>x
copy(b, r);
sgemv(-1, A, x, 1, r);
// p = M^-1 <20> r
preconditioner.apply(r, p);
//copy(r, p);
delta_new = dot(r, p);
delta_0 = delta_new;
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
{
i++;
// q = A<>p
mult(A, p, q);
// alpha = delta_new / p<>q
alpha = delta_new / dot(p, q);
// x = alfa<66>p + x
saxpy(alpha, p, x);
if ((i & 31) == 0) // recompute r after 32 steps
{
// r = b - A<>x
copy(b, r);
sgemv(-1, A, x, 1, r);
}
else
{
// r = r - alfa<66>q
saxpy(-alpha, q, r);
}
// s = M^-1 <20> r
preconditioner.apply(r, s);
//copy(r, s);
delta_old = delta_new;
delta_new = dot( r, s );
beta = delta_new / delta_old;
// p = s + beta<74>p
copy(s, p);
saxpy(beta, p, s);
}
return i;
}
#if 0 // Nonsymmetric solvers
/** Bi-conjugate gradient method. */
MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = 4 * D;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha;
float beta;
DenseVector r(D);
DenseVector rtilde(D);
DenseVector p(D);
DenseVector ptilde(D);
DenseVector q(D);
DenseVector qtilde(D);
DenseVector tmp(D); // temporal vector.
// r = b - A<>x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
// p = r;
p.Set( r );
// ptilde = rtilde
ptilde.Set( rtilde );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i < i_max ) {
i++;
rho_1 = DenseVectorDotProduct( r, rtilde );
if( rho_1 == 0 ) {
// method fails.
return -i;
}
if (i == 1) {
p.Set( r );
ptilde.Set( rtilde );
}
else {
beta = rho_1 / rho_2;
// p = r + beta * p;
p.Mad( r, p, beta );
// ptilde = ztilde + beta * ptilde;
ptilde.Mad( rtilde, ptilde, beta );
}
// q = A * p;
A.Product( p, q );
// qtilde = A^t * ptilde;
A.TransProduct( ptilde, qtilde );
alpha = rho_1 / DenseVectorDotProduct( ptilde, q );
// x += alpha * p;
x.Mad( x, p, alpha );
// r -= alpha * q;
r.Mad( r, q, -alpha );
// rtilde -= alpha * qtilde;
rtilde.Mad( rtilde, qtilde, -alpha );
rho_2 = rho_1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges
return i;
}
}
return i;
}
/** Bi-conjugate gradient stabilized method. */
int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = 2 * D;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha = 0;
float beta = 0;
float omega = 0;
DenseVector p(D);
DenseVector phat(D);
DenseVector s(D);
DenseVector shat(D);
DenseVector t(D);
DenseVector v(D);
DenseVector r(D);
DenseVector rtilde(D);
DenseVector tmp(D);
// r = b - A<>x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i<i_max ) {
i++;
rho_1 = DenseVectorDotProduct( rtilde, r );
if( rho_1 == 0 ) {
// method fails
return -i;
}
if( i == 1 ) {
p.Set( r );
}
else {
beta = (rho_1 / rho_2) * (alpha / omega);
// p = r + beta * (p - omega * v);
p.Mad( p, v, -omega );
p.Mad( r, p, beta );
}
//phat = M.solve(p);
phat.Set( p );
//Precond( &phat, p );
//v = A * phat;
A.Product( phat, v );
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
// s = r - alpha * v;
s.Mad( r, v, -alpha );
resid = s.Norm() / normb;
if( resid < epsilon ) {
// x += alpha * phat;
x.Mad( x, phat, alpha );
return i;
}
//shat = M.solve(s);
shat.Set( s );
//Precond( &shat, s );
//t = A * shat;
A.Product( shat, t );
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
// x += alpha * phat + omega * shat;
x.Mad( x, shat, omega );
x.Mad( x, phat, alpha );
//r = s - omega * t;
r.Mad( s, t, -omega );
rho_2 = rho_1;
resid = r.Norm() / normb;
if( resid < epsilon ) {
return i;
}
if( omega == 0 ) {
return -i; // ???
}
}
return i;
}
/** Bi-conjugate gradient stabilized method. */
int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = D;
// const int i_max = 1000;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha = 0;
float beta = 0;
float omega = 0;
DenseVector p(D);
DenseVector phat(D);
DenseVector s(D);
DenseVector shat(D);
DenseVector t(D);
DenseVector v(D);
DenseVector r(D);
DenseVector rtilde(D);
DenseVector tmp(D);
// r = b - A<>x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i<i_max ) {
i++;
rho_1 = DenseVectorDotProduct( rtilde, r );
if( rho_1 == 0 ) {
// method fails
return -i;
}
if( i == 1 ) {
p.Set( r );
}
else {
beta = (rho_1 / rho_2) * (alpha / omega);
// p = r + beta * (p - omega * v);
p.Mad( p, v, -omega );
p.Mad( r, p, beta );
}
//phat = M.solve(p);
//phat.Set( p );
M.Precond( &phat, p );
//v = A * phat;
A.Product( phat, v );
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
// s = r - alpha * v;
s.Mad( r, v, -alpha );
resid = s.Norm() / normb;
//printf( "--- Iteration %d: residual = %f\n", i, resid );
if( resid < epsilon ) {
// x += alpha * phat;
x.Mad( x, phat, alpha );
return i;
}
//shat = M.solve(s);
//shat.Set( s );
M.Precond( &shat, s );
//t = A * shat;
A.Product( shat, t );
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
// x += alpha * phat + omega * shat;
x.Mad( x, shat, omega );
x.Mad( x, phat, alpha );
//r = s - omega * t;
r.Mad( s, t, -omega );
rho_2 = rho_1;
resid = r.Norm() / normb;
if( resid < epsilon ) {
return i;
}
if( omega == 0 ) {
return -i; // ???
}
}
return i;
}
#endif

View File

@ -1,20 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_SOLVER_H
#define NV_MATH_SOLVER_H
#include <nvmath/Sparse.h>
namespace nv
{
// Linear solvers.
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f);
NVMATH_API void SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
// NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
} // nv namespace
#endif // NV_MATH_SOLVER_H

View File

@ -1,831 +0,0 @@
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
#include <nvmath/Sparse.h>
#include <nvmath/KahanSum.h>
using namespace nv;
/// Ctor.
FullVector::FullVector(uint dim)
{
m_array.resize(dim);
}
/// Copy ctor.
FullVector::FullVector(const FullVector & v) : m_array(v.m_array)
{
}
/// Copy operator
const FullVector & FullVector::operator=(const FullVector & v)
{
nvCheck(dimension() == v.dimension());
m_array = v.m_array;
return *this;
}
void FullVector::fill(float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] = f;
}
}
void FullVector::operator+= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] += v.m_array[i];
}
}
void FullVector::operator-= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] -= v.m_array[i];
}
}
void FullVector::operator*= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] *= v.m_array[i];
}
}
void FullVector::operator+= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] += f;
}
}
void FullVector::operator-= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] -= f;
}
}
void FullVector::operator*= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] *= f;
}
}
void nv::saxpy(float a, const FullVector & x, FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
y[i] += a * x[i];
}
}
void nv::copy(const FullVector & x, FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
y[i] = x[i];
}
}
void nv::scal(float a, FullVector & x)
{
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
x[i] *= a;
}
}
float nv::dot(const FullVector & x, const FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
/*float sum = 0;
for (uint i = 0; i < dim; i++)
{
sum += x[i] * y[i];
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < dim; i++)
{
kahan.add(x[i] * y[i]);
}
return kahan.sum();
}
FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d)
{
m_array.resize(d*d, 0.0f);
}
FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h)
{
m_array.resize(w*h, 0.0f);
}
FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height)
{
m_array = m.m_array;
}
const FullMatrix & FullMatrix::operator=(const FullMatrix & m)
{
nvCheck(width() == m.width());
nvCheck(height() == m.height());
m_array = m.m_array;
return *this;
}
float FullMatrix::getCoefficient(uint x, uint y) const
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
return m_array[y * width() + x];
}
void FullMatrix::setCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] = f;
}
void FullMatrix::addCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] += f;
}
void FullMatrix::mulCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] *= f;
}
float FullMatrix::dotRow(uint y, const FullVector & v) const
{
nvDebugCheck( v.dimension() == width() );
nvDebugCheck( y < height() );
float sum = 0;
const uint count = v.dimension();
for (uint i = 0; i < count; i++)
{
sum += m_array[y * count + i] * v[i];
}
return sum;
}
void FullMatrix::madRow(uint y, float alpha, FullVector & v) const
{
nvDebugCheck( v.dimension() == width() );
nvDebugCheck( y < height() );
const uint count = v.dimension();
for (uint i = 0; i < count; i++)
{
v[i] += m_array[y * count + i];
}
}
// y = M * x
void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y)
{
mult(NoTransposed, M, x, y);
}
void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y)
{
const uint w = M.width();
const uint h = M.height();
if (TM == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
y.fill(0.0f);
for (uint i = 0; i < h; i++)
{
M.madRow(i, x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = M.dotRow(i, x);
}
}
}
// y = alpha*A*x + beta*y
void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
{
sgemv(alpha, NoTransposed, A, x, beta, y);
}
void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
{
const uint w = A.width();
const uint h = A.height();
if (TA == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
for (uint i = 0; i < h; i++)
{
A.madRow(i, alpha * x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
}
}
}
// Multiply a row of A by a column of B.
static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B)
{
const uint w = (TA == NoTransposed) ? A.width() : A.height();
nvDebugCheck(w == (TB == NoTransposed) ? B.height() : A.width());
float sum = 0.0f;
for (uint k = 0; k < w; k++)
{
const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k); // @@ Move branches out of the loop?
const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i);
sum += a * b;
}
return sum;
}
// C = A * B
void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C)
{
mult(NoTransposed, A, NoTransposed, B, C);
}
void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C)
{
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
}
// C = alpha*A*B + beta*C
void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C)
{
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
}
void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C)
{
const uint w = C.width();
const uint h = C.height();
uint aw = (TA == NoTransposed) ? A.width() : A.height();
uint ah = (TA == NoTransposed) ? A.height() : A.width();
uint bw = (TB == NoTransposed) ? B.width() : B.height();
uint bh = (TB == NoTransposed) ? B.height() : B.width();
nvDebugCheck(aw == bh);
nvDebugCheck(bw == ah);
nvDebugCheck(w == bw);
nvDebugCheck(h == ah);
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y);
C.setCoefficient(x, y, c);
}
}
}
/// Ctor. Init the size of the sparse matrix.
SparseMatrix::SparseMatrix(uint d) : m_width(d)
{
m_array.resize(d);
}
/// Ctor. Init the size of the sparse matrix.
SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w)
{
m_array.resize(h);
}
SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width)
{
m_array = m.m_array;
}
const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m)
{
nvCheck(width() == m.width());
nvCheck(height() == m.height());
m_array = m.m_array;
return *this;
}
// x is column, y is row
float SparseMatrix::getCoefficient(uint x, uint y) const
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x) return m_array[y][i].v;
}
return 0.0f;
}
void SparseMatrix::setCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v = f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
void SparseMatrix::addCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v += f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
void SparseMatrix::mulCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v *= f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
float SparseMatrix::sumRow(uint y) const
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
/*float sum = 0;
for (uint i = 0; i < count; i++)
{
sum += m_array[y][i].v;
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
kahan.add(m_array[y][i].v);
}
return kahan.sum();
}
float SparseMatrix::dotRow(uint y, const FullVector & v) const
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
/*float sum = 0;
for (uint i = 0; i < count; i++)
{
sum += m_array[y][i].v * v[m_array[y][i].x];
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
kahan.add(m_array[y][i].v * v[m_array[y][i].x]);
}
return kahan.sum();
}
void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const
{
nvDebugCheck(y < height());
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
v[m_array[y][i].x] += alpha * m_array[y][i].v;
}
}
void SparseMatrix::clearRow(uint y)
{
nvDebugCheck( y < height() );
m_array[y].clear();
}
void SparseMatrix::scaleRow(uint y, float f)
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
m_array[y][i].v *= f;
}
}
void SparseMatrix::normalizeRow(uint y)
{
nvDebugCheck( y < height() );
float norm = 0.0f;
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
float f = m_array[y][i].v;
norm += f * f;
}
scaleRow(y, 1.0f / sqrtf(norm));
}
void SparseMatrix::clearColumn(uint x)
{
nvDebugCheck(x < width());
for (uint y = 0; y < height(); y++)
{
const uint count = m_array[y].count();
for (uint e = 0; e < count; e++)
{
if (m_array[y][e].x == x)
{
m_array[y][e].v = 0.0f;
break;
}
}
}
}
void SparseMatrix::scaleColumn(uint x, float f)
{
nvDebugCheck(x < width());
for (uint y = 0; y < height(); y++)
{
const uint count = m_array[y].count();
for (uint e = 0; e < count; e++)
{
if (m_array[y][e].x == x)
{
m_array[y][e].v *= f;
break;
}
}
}
}
const Array<SparseMatrix::Coefficient> & SparseMatrix::getRow(uint y) const
{
return m_array[y];
}
// y = M * x
void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y)
{
mult(NoTransposed, M, x, y);
}
void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y)
{
const uint w = M.width();
const uint h = M.height();
if (TM == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
y.fill(0.0f);
for (uint i = 0; i < h; i++)
{
M.madRow(i, x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = M.dotRow(i, x);
}
}
}
// y = alpha*A*x + beta*y
void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
{
sgemv(alpha, NoTransposed, A, x, beta, y);
}
void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
{
const uint w = A.width();
const uint h = A.height();
if (TA == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
for (uint i = 0; i < h; i++)
{
A.madRow(i, alpha * x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
}
}
}
// dot y-row of A by x-column of B
static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
const uint count = row.count();
/*float sum = 0.0f;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
sum += c.v * B.getCoefficient(x, c.x);
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
kahan.add(c.v * B.getCoefficient(x, c.x));
}
return kahan.sum();
}
// dot y-row of A by x-row of B
static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
const uint count = row.count();
/*float sum = 0.0f;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
sum += c.v * B.getCoefficient(c.x, x);
}
//return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
kahan.add(c.v * B.getCoefficient(c.x, x));
}
return kahan.sum();
}
// dot y-column of A by x-column of B
static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
nvDebugCheck(A.height() == B.height());
const uint h = A.height();
/*float sum = 0.0f;
for (uint i = 0; i < h; i++)
{
sum += A.getCoefficient(y, i) * B.getCoefficient(x, i);
}
//return sum;*/
KahanSum kahan;
for (uint i = 0; i < h; i++)
{
kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i));
}
return kahan.sum();
}
// C = A * B
void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C)
{
mult(NoTransposed, A, NoTransposed, B, C);
}
void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C)
{
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
}
// C = alpha*A*B + beta*C
void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C)
{
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
}
void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C)
{
const uint w = C.width();
const uint h = C.height();
uint aw = (TA == NoTransposed) ? A.width() : A.height();
uint ah = (TA == NoTransposed) ? A.height() : A.width();
uint bw = (TB == NoTransposed) ? B.width() : B.height();
uint bh = (TB == NoTransposed) ? B.height() : B.width();
nvDebugCheck(aw == bh);
nvDebugCheck(bw == ah);
nvDebugCheck(w == bw);
nvDebugCheck(h == ah);
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
float c = beta * C.getCoefficient(x, y);
if (TA == NoTransposed && TB == NoTransposed)
{
// dot y-row of A by x-column of B.
c += alpha * dotRowColumn(y, A, x, B);
}
else if (TA == Transposed && TB == Transposed)
{
// dot y-column of A by x-row of B.
c += alpha * dotRowColumn(x, B, y, A);
}
else if (TA == Transposed && TB == NoTransposed)
{
// dot y-column of A by x-column of B.
c += alpha * dotColumnColumn(y, A, x, B);
}
else if (TA == NoTransposed && TB == Transposed)
{
// dot y-row of A by x-row of B.
c += alpha * dotRowRow(y, A, x, B);
}
if (c != 0.0f)
{
C.setCoefficient(x, y, c);
}
}
}
}
// C = At * A
void nv::sqm(const SparseMatrix & A, SparseMatrix & C)
{
// This is quite expensive...
mult(Transposed, A, NoTransposed, A, C);
}

View File

@ -1,198 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_SPARSE_H
#define NV_MATH_SPARSE_H
#include <nvcore/Containers.h>
#include <nvmath/nvmath.h>
// Full and sparse vector and matrix classes. BLAS subset.
namespace nv
{
class FullVector;
class FullMatrix;
class SparseMatrix;
/// Fixed size vector class.
class FullVector
{
public:
FullVector(uint dim);
FullVector(const FullVector & v);
const FullVector & operator=(const FullVector & v);
uint dimension() const { return m_array.count(); }
const float & operator[]( uint index ) const { return m_array[index]; }
float & operator[] ( uint index ) { return m_array[index]; }
void fill(float f);
void operator+= (const FullVector & v);
void operator-= (const FullVector & v);
void operator*= (const FullVector & v);
void operator+= (float f);
void operator-= (float f);
void operator*= (float f);
private:
Array<float> m_array;
};
// Pseudo-BLAS interface.
NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y); // y = a * x + y
NVMATH_API void copy(const FullVector & x, FullVector & y);
NVMATH_API void scal(float a, FullVector & x);
NVMATH_API float dot(const FullVector & x, const FullVector & y);
enum Transpose
{
NoTransposed = 0,
Transposed = 1
};
/// Full matrix class.
class FullMatrix
{
public:
FullMatrix(uint d);
FullMatrix(uint w, uint h);
FullMatrix(const FullMatrix & m);
const FullMatrix & operator=(const FullMatrix & m);
uint width() const { return m_width; }
uint height() const { return m_height; }
bool isSquare() const { return m_width == m_height; }
float getCoefficient(uint x, uint y) const;
void setCoefficient(uint x, uint y, float f);
void addCoefficient(uint x, uint y, float f);
void mulCoefficient(uint x, uint y, float f);
float dotRow(uint y, const FullVector & v) const;
void madRow(uint y, float alpha, FullVector & v) const;
protected:
bool isValid() const {
return m_array.size() == (m_width * m_height);
}
private:
const uint m_width;
const uint m_height;
Array<float> m_array;
};
NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y);
NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y);
// y = alpha*A*x + beta*y
NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C);
NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C);
// C = alpha*A*B + beta*C
NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C);
NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C);
/**
* Sparse matrix class. The matrix is assumed to be sparse and to have
* very few non-zero elements, for this reason it's stored in indexed
* format. To multiply column vectors efficiently, the matrix stores
* the elements in indexed-column order, there is a list of indexed
* elements for each row of the matrix. As with the FullVector the
* dimension of the matrix is constant.
**/
class SparseMatrix
{
friend class FullMatrix;
public:
// An element of the sparse array.
struct Coefficient {
uint x; // column
float v; // value
};
public:
SparseMatrix(uint d);
SparseMatrix(uint w, uint h);
SparseMatrix(const SparseMatrix & m);
const SparseMatrix & operator=(const SparseMatrix & m);
uint width() const { return m_width; }
uint height() const { return m_array.count(); }
bool isSquare() const { return width() == height(); }
float getCoefficient(uint x, uint y) const; // x is column, y is row
void setCoefficient(uint x, uint y, float f);
void addCoefficient(uint x, uint y, float f);
void mulCoefficient(uint x, uint y, float f);
float sumRow(uint y) const;
float dotRow(uint y, const FullVector & v) const;
void madRow(uint y, float alpha, FullVector & v) const;
void clearRow(uint y);
void scaleRow(uint y, float f);
void normalizeRow(uint y);
void clearColumn(uint x);
void scaleColumn(uint x, float f);
const Array<Coefficient> & getRow(uint y) const;
private:
/// Number of columns.
const uint m_width;
/// Array of matrix elements.
Array< Array<Coefficient> > m_array;
};
NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y);
NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y);
// y = alpha*A*x + beta*y
NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C);
NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C);
// C = alpha*A*B + beta*C
NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C);
NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C);
// C = At * A
NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C);
} // nv namespace
#endif // NV_MATH_SPARSE_H

View File

@ -3,7 +3,6 @@
#ifndef NV_MATH_SPHERICALHARMONIC_H
#define NV_MATH_SPHERICALHARMONIC_H
#include <string.h> // memcpy
#include <nvmath/Vector.h>
namespace nv

View File

@ -96,7 +96,7 @@ static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg
if(min>rad || max<-rad) return false;
bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
{
// use separating axis theorem to test overlap between triangle and box
// need to test for overlap in these directions:
@ -170,7 +170,7 @@ bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const T
}
bool nv::triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
{
// use separating axis theorem to test overlap between triangle and box
// need to test for overlap in these directions:

View File

@ -1,82 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Stream.h>
#include <nvmath/Vector.h>
#include <nvmath/Matrix.h>
#include <nvmath/Quaternion.h>
#include <nvmath/Basis.h>
#include <nvmath/Box.h>
#include <nvmath/TypeSerialization.h>
using namespace nv;
Stream & nv::operator<< (Stream & s, Vector2 & v)
{
float x = v.x();
float y = v.y();
s << x << y;
if (s.isLoading())
{
v.set(x, y);
}
return s;
}
Stream & nv::operator<< (Stream & s, Vector3 & v)
{
float x = v.x();
float y = v.y();
float z = v.z();
s << x << y << z;
if (s.isLoading())
{
v.set(x, y, z);
}
return s;
}
Stream & nv::operator<< (Stream & s, Vector4 & v)
{
float x = v.x();
float y = v.y();
float z = v.z();
float w = v.w();
s << x << y << z << w;
if (s.isLoading())
{
v.set(x, y, z, w);
}
return s;
}
Stream & nv::operator<< (Stream & s, Matrix & m)
{
return s;
}
Stream & nv::operator<< (Stream & s, Quaternion & q)
{
return s << q.asVector();
}
Stream & nv::operator<< (Stream & s, Basis & basis)
{
return s << basis.tangent << basis.bitangent << basis.normal;
}
Stream & nv::operator<< (Stream & s, Box & box)
{
return s << box.m_mins << box.m_maxs;
}

View File

@ -1,32 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_TYPESERIALIZATION_H
#define NV_MATH_TYPESERIALIZATION_H
#include <nvmath/nvmath.h>
namespace nv
{
class Stream;
class Vector2;
class Vector3;
class Vector4;
class Matrix;
class Quaternion;
struct Basis;
class Box;
NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj);
NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj);
NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj);
NVMATH_API Stream & operator<< (Stream & s, Matrix & obj);
NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj);
NVMATH_API Stream & operator<< (Stream & s, Basis & obj);
NVMATH_API Stream & operator<< (Stream & s, Box & obj);
} // nv namespace
#endif // NV_MATH_TYPESERIALIZATION_H

View File

@ -4,7 +4,7 @@
#define NV_MATH_VECTOR_H
#include <nvmath/nvmath.h>
#include <nvcore/Algorithms.h> // min, max
#include <nvcore/Containers.h> // min, max
namespace nv
{
@ -71,7 +71,6 @@ public:
const Vector2 & xy() const;
scalar component(uint idx) const;
void setComponent(uint idx, scalar f);
const scalar * ptr() const;
@ -240,21 +239,13 @@ inline const Vector2 & Vector3::xy() const
inline scalar Vector3::component(uint idx) const
{
nvDebugCheck(idx < 3);
if (idx == 0) return m_x;
if (idx == 1) return m_y;
if (idx == 2) return m_z;
if (idx == 0) return x();
if (idx == 1) return y();
if (idx == 2) return z();
nvAssume(false);
return 0.0f;
}
inline void Vector3::setComponent(uint idx, float f)
{
nvDebugCheck(idx < 3);
if (idx == 0) m_x = f;
else if (idx == 1) m_y = f;
else if (idx == 2) m_z = f;
}
inline const scalar * Vector3::ptr() const
{
return &m_x;
@ -486,35 +477,6 @@ inline scalar length(Vector2::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector2::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
}
inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
{
float l = length(v);
nvDebugCheck(!isZero(l, epsilon));
Vector2 n = scale(v, 1.0f / l);
nvDebugCheck(isNormalized(n));
return n;
}
inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
{
float l = length(v);
if (isZero(l, epsilon)) {
return fallback;
}
return scale(v, 1.0f / l);
}
inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
{
return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon);
@ -633,11 +595,6 @@ inline scalar length(Vector3::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector3::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
@ -759,11 +716,6 @@ inline scalar length(Vector4::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector4::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);

View File

@ -136,11 +136,6 @@ inline float lerp(float f0, float f1, float t)
return f0 * s + f1 * t;
}
inline float square(float f)
{
return f * f;
}
} // nv
#endif // NV_MATH_H

View File

@ -13,10 +13,10 @@ SET(NVTT_SRCS
CompressDXT.cpp
CompressRGB.h
CompressRGB.cpp
FastCompressDXT.h
FastCompressDXT.cpp
QuickCompressDXT.h
QuickCompressDXT.cpp
OptimalCompressDXT.h
OptimalCompressDXT.cpp
SingleColorLookup.h
CompressionOptions.h
CompressionOptions.cpp
@ -79,9 +79,6 @@ TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(stress tests/stress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(stress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)

View File

@ -29,8 +29,8 @@
#include "nvtt.h"
#include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
@ -57,26 +57,33 @@ using namespace nv;
using namespace nvtt;
void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
{
const uint w = image->width();
const uint h = image->height();
}
nv::FastCompressor::~FastCompressor()
{
}
void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1(rgba.color(0), &block);
}
else
{
QuickCompress::compressDXT1(rgba, &block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -86,27 +93,19 @@ void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & ou
}
void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & outputOptions)
void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
// @@ We could do better here: check for single RGB, but varying alpha.
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1a(rgba.color(0), &block);
}
else
{
QuickCompress::compressDXT1a(rgba, &block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -116,17 +115,18 @@ void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & o
}
void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT3 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
QuickCompress::compressDXT3(rgba, &block);
if (outputOptions.outputHandler != NULL) {
@ -137,19 +137,19 @@ void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Privat
}
void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!!
nv::compressBlock_BoundsRange(rgba, &block);
rgba.init(m_image, x, y);
QuickCompress::compressDXT5(rgba, &block, 0);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -159,23 +159,21 @@ void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Privat
}
void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
// copy X coordinate to alpha channel and Y coordinate to green channel.
rgba.swizzleDXT5n();
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!!
nv::compressBlock_BoundsRange(rgba, &block);
QuickCompress::compressDXT5(rgba, &block, 0);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -185,42 +183,28 @@ void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Priva
}
void nv::fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
{
// @@ TODO
// compress red channel (X)
}
void nv::fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
nv::SlowCompressor::~SlowCompressor()
{
// @@ TODO
// compress red, green channels (X,Y)
}
void nv::doPrecomputation()
void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
static bool done = false; // @@ Stop using statics for reentrancy. Although the worst that could happen is that this stuff is precomputed multiple times.
if (!done)
{
done = true;
squish::FastClusterFit::DoPrecomputation();
}
m_image = image;
m_alphaMode = alphaMode;
}
void nv::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
doPrecomputation();
//squish::WeightedClusterFit fit;
//squish::ClusterFit fit;
squish::FastClusterFit fit;
@ -229,11 +213,11 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1(rgba.color(0), &block);
OptimalCompress::compressDXT1(rgba.color(0), &block);
}
else
{
@ -250,10 +234,10 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
}
void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
@ -264,11 +248,20 @@ void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outpu
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
if (rgba.isSingleColor())
bool anyAlpha = false;
bool allAlpha = true;
for (uint i = 0; i < 16; i++)
{
QuickCompress::compressDXT1a(rgba.color(0), &block);
if (rgba.color(i).a < 128) anyAlpha = true;
else allAlpha = false;
}
if ((!anyAlpha && rgba.isSingleColor() || allAlpha))
{
OptimalCompress::compressDXT1a(rgba.color(0), &block);
}
else
{
@ -285,29 +278,37 @@ void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outpu
}
void nv::compressDXT3(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT3 block;
squish::WeightedClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
// Compress explicit alpha.
QuickCompress::compressDXT3A(rgba, &block.alpha);
OptimalCompress::compressDXT3A(rgba, &block.alpha);
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -316,10 +317,10 @@ void nv::compressDXT3(const Image * image, const OutputOptions::Private & output
}
}
void nv::compressDXT5(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
@ -330,12 +331,12 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
// Compress alpha.
if (compressionOptions.quality == Quality_Highest)
{
compressBlock_BruteForce(rgba, &block.alpha);
OptimalCompress::compressDXT5A(rgba, &block.alpha);
}
else
{
@ -343,9 +344,16 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
}
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -355,28 +363,25 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
}
void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
doPrecomputation();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
// copy X coordinate to green channel and Y coordinate to alpha channel.
rgba.swizzleDXT5n();
// Compress X.
if (compressionOptions.quality == Quality_Highest)
{
compressBlock_BruteForce(rgba, &block.alpha);
OptimalCompress::compressDXT5A(rgba, &block.alpha);
}
else
{
@ -384,7 +389,7 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
}
// Compress Y.
QuickCompress::compressDXT1G(rgba, &block.color);
OptimalCompress::compressDXT1G(rgba, &block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -394,10 +399,10 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
}
void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
AlphaBlockDXT5 block;
@ -405,11 +410,11 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
if (compressionOptions.quality == Quality_Highest)
{
compressBlock_BruteForce(rgba, &block);
OptimalCompress::compressDXT5A(rgba, &block);
}
else
{
@ -424,10 +429,10 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o
}
void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock xcolor;
ColorBlock ycolor;
@ -437,16 +442,16 @@ void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & o
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
xcolor.init(image, x, y);
xcolor.init(m_image, x, y);
xcolor.splatX();
ycolor.init(image, x, y);
ycolor.init(m_image, x, y);
ycolor.splatY();
if (compressionOptions.quality == Quality_Highest)
{
compressBlock_BruteForce(xcolor, &block.x);
compressBlock_BruteForce(ycolor, &block.y);
OptimalCompress::compressDXT5A(xcolor, &block.x);
OptimalCompress::compressDXT5A(ycolor, &block.y);
}
else
{

View File

@ -32,25 +32,45 @@ namespace nv
class Image;
class FloatImage;
void doPrecomputation();
class FastCompressor
{
public:
FastCompressor();
~FastCompressor();
// Fast compressors.
void fastCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
// Normal compressors.
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
class SlowCompressor
{
public:
SlowCompressor();
~SlowCompressor();
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
// External compressors.
#if defined(HAVE_S3QUANT)

View File

@ -123,7 +123,7 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
}
// Zero padding.
for (uint x = w; x < pitch; x++)
for (uint x = w * byteCount; x < pitch; x++)
{
*(dst + x) = 0;
}

View File

@ -34,6 +34,7 @@
#include <nvimage/Filter.h>
#include <nvimage/Quantize.h>
#include <nvimage/NormalMap.h>
#include <nvimage/PixelFormat.h>
#include "Compressor.h"
#include "InputOptions.h"
@ -41,7 +42,6 @@
#include "OutputOptions.h"
#include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "CompressRGB.h"
#include "cuda/CudaUtils.h"
#include "cuda/CudaCompressDXT.h"
@ -56,7 +56,7 @@ namespace
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) {
if (format == Format_DXT1 || format == Format_DXT1a) {
return 8;
}
else if (format == Format_DXT3) {
@ -71,9 +71,6 @@ namespace
else if (format == Format_BC5) {
return 16;
}
else if (format == Format_CTX1) {
return 8;
}
return 0;
}
@ -203,7 +200,7 @@ namespace nvtt
AutoPtr<FloatImage> m_floatImage;
};
}
} // nvtt namespace
Compressor::Compressor() : m(*new Compressor::Private())
@ -348,7 +345,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
{
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) {
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
@ -369,10 +366,6 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
header.setFourCC('A', 'T', 'I', '2');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_CTX1) {
header.setFourCC('C', 'T', 'X', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
@ -478,11 +471,6 @@ bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Privat
// Convert linear float image to fixed image ready for compression.
mipmap.toFixedImage(inputOptions);
if (inputOptions.premultiplyAlpha)
{
premultiplyAlphaMipmap(mipmap, inputOptions);
}
return true;
}
@ -583,33 +571,10 @@ void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Priva
// Resize image.
BoxFilter boxFilter;
mipmap.setImage(mipmap.asFloatImage()->downSample(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
}
void Compressor::Private::premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{
nvDebugCheck(mipmap.asFixedImage() != NULL);
Image * image = mipmap.asMutableFixedImage();
const uint w = image->width();
const uint h = image->height();
const uint count = w * h;
for (uint i = 0; i < count; ++i)
{
Color32 c = image->pixel(i);
c.r = (uint(c.r) * uint(c.a)) >> 8;
c.g = (uint(c.g) * uint(c.a)) >> 8;
c.b = (uint(c.b) * uint(c.a)) >> 8;
image->pixel(i) = c;
}
}
// Process an input image: Convert to normal map, normalize, or convert to linear space.
void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{
@ -653,13 +618,6 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
{
nvDebugCheck(mipmap.asFixedImage() != NULL);
if (compressionOptions.enableColorDithering)
{
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
{
Quantize::FloydSteinberg_RGB16(mipmap.asMutableFixedImage());
}
}
if (compressionOptions.binaryAlpha)
{
if (compressionOptions.enableAlphaDithering)
@ -671,19 +629,50 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
}
}
else
if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering)
{
uint rsize = 8;
uint gsize = 8;
uint bsize = 8;
uint asize = 8;
if (compressionOptions.enableColorDithering)
{
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
{
rsize = 5;
gsize = 6;
bsize = 5;
}
else if (compressionOptions.format == Format_RGB)
{
uint rshift, gshift, bshift;
PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize);
PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize);
PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize);
}
}
if (compressionOptions.enableAlphaDithering)
{
if (compressionOptions.format == Format_DXT3)
{
Quantize::Alpha4(mipmap.asMutableFixedImage());
asize = 4;
}
else if (compressionOptions.format == Format_DXT1a)
else if (compressionOptions.format == Format_RGB)
{
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
uint ashift;
PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize);
}
}
if (compressionOptions.binaryAlpha)
{
asize = 8; // Already quantized.
}
Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize);
}
}
@ -692,9 +681,15 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
const Image * image = mipmap.asFixedImage();
nvDebugCheck(image != NULL);
FastCompressor fast;
fast.setImage(image, inputOptions.alphaMode);
SlowCompressor slow;
slow.setImage(image, inputOptions.alphaMode);
if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
{
compressRGB(image, outputOptions, compressionOptions);
@ -718,18 +713,19 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT1(image, outputOptions);
fast.compressDXT1(outputOptions);
}
else
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT1(image, compressionOptions, outputOptions);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
}
else
{
compressDXT1(image, outputOptions, compressionOptions);
slow.compressDXT1(compressionOptions, outputOptions);
}
}
}
@ -737,49 +733,38 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT1a(image, outputOptions);
fast.compressDXT1a(outputOptions);
}
else
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
/*cuda*/compressDXT1a(image, outputOptions, compressionOptions);
/*cuda*/slow.compressDXT1a(compressionOptions, outputOptions);
}
else
{
compressDXT1a(image, outputOptions, compressionOptions);
slow.compressDXT1a(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT1n)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT1n(image, compressionOptions, outputOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
}
else if (compressionOptions.format == Format_DXT3)
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT3(image, outputOptions);
fast.compressDXT3(outputOptions);
}
else
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT3(image, inputOptions, compressionOptions, outputOptions);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT3(compressionOptions, outputOptions);
}
else
{
compressDXT3(image, outputOptions, compressionOptions);
slow.compressDXT3(compressionOptions, outputOptions);
}
}
}
@ -787,18 +772,19 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT5(image, outputOptions);
fast.compressDXT5(outputOptions);
}
else
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT5(image, inputOptions, compressionOptions, outputOptions);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT5(compressionOptions, outputOptions);
}
else
{
compressDXT5(image, outputOptions, compressionOptions);
slow.compressDXT5(compressionOptions, outputOptions);
}
}
}
@ -806,32 +792,20 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT5n(image, outputOptions);
fast.compressDXT5n(outputOptions);
}
else
{
compressDXT5n(image, outputOptions, compressionOptions);
slow.compressDXT5n(compressionOptions, outputOptions);
}
}
else if (compressionOptions.format == Format_BC4)
{
compressBC4(image, outputOptions, compressionOptions);
slow.compressBC4(compressionOptions, outputOptions);
}
else if (compressionOptions.format == Format_BC5)
{
compressBC5(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_CTX1)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressCTX1(image, compressionOptions, outputOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
slow.compressBC5(compressionOptions, outputOptions);
}
return true;

View File

@ -58,7 +58,6 @@ namespace nvtt
void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
void premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;

View File

@ -1,456 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "FastCompressDXT.h"
#if defined(__SSE2__)
#include <emmintrin.h>
#endif
#if defined(__SSE__)
#include <xmmintrin.h>
#endif
#if defined(__MMX__)
#include <mmintrin.h>
#endif
#undef __VEC__
#if defined(__VEC__)
#include <altivec.h>
#undef bool
#endif
// Online Resources:
// - http://www.jasondorie.com/ImageLib.zip
// - http://homepage.hispeed.ch/rscheidegger/dri_experimental/s3tc_index.html
// - http://www.sjbrown.co.uk/?article=dxt
using namespace nv;
#if defined(__SSE2__) && 0
// @@ TODO
typedef __m128i VectorColor;
inline static __m128i loadColor(Color32 c)
{
return ...;
}
inline static __m128i absoluteDifference(__m128i a, __m128i b)
{
return ...;
}
inline uint colorDistance(__m128i a, __m128i b)
{
return 0;
}
#elif defined(__MMX__) && 0
typedef __m64 VectorColor;
inline static __m64 loadColor(Color32 c)
{
return _mm_unpacklo_pi8(_mm_cvtsi32_si64(c), _mm_setzero_si64());
}
inline static __m64 absoluteDifference(__m64 a, __m64 b)
{
// = |a-b| or |b-a|
return _mm_or_si64(_mm_subs_pu16(a, b), _mm_subs_pu16(b, a));
}
inline uint colorDistance(__m64 a, __m64 b)
{
union {
__m64 v;
uint16 part[4];
} s;
s.v = absoluteDifference(a, b);
// @@ This is very slow!
return s.part[0] + s.part[1] + s.part[2] + s.part[3];
}
#define vectorEnd _mm_empty
#elif defined(__VEC__)
typedef vector signed int VectorColor;
inline static vector signed int loadColor(Color32 c)
{
return (vector signed int) (c.r, c.g, c.b, c.a);
}
// Get the absolute distance between the given colors.
inline static uint colorDistance(vector signed int c0, vector signed int c1)
{
int result;
vector signed int v = vec_sums(vec_abs(vec_sub(c0, c1)), (vector signed int)0);
vec_ste(vec_splat(v, 3), 0, &result);
return result;
}
inline void vectorEnd()
{
}
#else
typedef Color32 VectorColor;
inline static Color32 loadColor(Color32 c)
{
return c;
}
inline static Color32 premultiplyAlpha(Color32 c)
{
Color32 pm;
pm.r = (c.r * c.a) >> 8;
pm.g = (c.g * c.a) >> 8;
pm.b = (c.b * c.a) >> 8;
pm.a = c.a;
return pm;
}
inline static uint sqr(uint s)
{
return s*s;
}
// Get the absolute distance between the given colors.
inline static uint colorDistance(Color32 c0, Color32 c1)
{
return sqr(c0.r - c1.r) + sqr(c0.g - c1.g) + sqr(c0.b - c1.b);
//return abs(c0.r - c1.r) + abs(c0.g - c1.g) + abs(c0.b - c1.b);
}
inline void vectorEnd()
{
}
#endif
inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const VectorColor vcolor0 = loadColor(palette[0]);
const VectorColor vcolor1 = loadColor(palette[1]);
const VectorColor vcolor2 = loadColor(palette[2]);
const VectorColor vcolor3 = loadColor(palette[3]);
uint indices = 0;
for(int i = 0; i < 16; i++) {
const VectorColor vcolor = loadColor(rgba.color(i));
uint d0 = colorDistance(vcolor0, vcolor);
uint d1 = colorDistance(vcolor1, vcolor);
uint d2 = colorDistance(vcolor2, vcolor);
uint d3 = colorDistance(vcolor3, vcolor);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
vectorEnd();
return indices;
}
// Compressor that uses bounding box.
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block)
{
Color32 c0, c1;
rgba.boundsRange(&c1, &c0);
block->col0 = toColor16(c0);
block->col1 = toColor16(c1);
nvDebugCheck(block->col0.u > block->col1.u);
// Use 4 color mode only.
//if (block->col0.u < block->col1.u) {
// swap(block->col0.u, block->col1.u);
//}
Color32 palette[4];
block->evaluatePalette4(palette);
block->indices = computeIndices(rgba, palette);
}
// Encode DXT3 block.
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block)
{
compressBlock_BoundsRange(rgba, &block->color);
compressBlock(rgba, &block->alpha);
}
// Encode DXT3 alpha block.
void nv::compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block)
{
block->alpha0 = rgba.color(0).a >> 4;
block->alpha1 = rgba.color(1).a >> 4;
block->alpha2 = rgba.color(2).a >> 4;
block->alpha3 = rgba.color(3).a >> 4;
block->alpha4 = rgba.color(4).a >> 4;
block->alpha5 = rgba.color(5).a >> 4;
block->alpha6 = rgba.color(6).a >> 4;
block->alpha7 = rgba.color(7).a >> 4;
block->alpha8 = rgba.color(8).a >> 4;
block->alpha9 = rgba.color(9).a >> 4;
block->alphaA = rgba.color(10).a >> 4;
block->alphaB = rgba.color(11).a >> 4;
block->alphaC = rgba.color(12).a >> 4;
block->alphaD = rgba.color(13).a >> 4;
block->alphaE = rgba.color(14).a >> 4;
block->alphaF = rgba.color(15).a >> 4;
}
static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
totalError += besterror;
block->setIndex(i, best);
}
return totalError;
}
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += besterror;
}
return totalError;
}
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block)
{
Color32 c0, c1;
rgba.boundsRangeAlpha(&c1, &c0);
block->color.col0 = toColor16(c0);
block->color.col1 = toColor16(c1);
nvDebugCheck(block->color.col0.u > block->color.col1.u);
Color32 palette[4];
block->color.evaluatePalette4(palette);
block->color.indices = computeIndices(rgba, palette);
nvDebugCheck(c0.a <= c1.a);
block->alpha.alpha0 = c0.a;
block->alpha.alpha1 = c1.a;
computeAlphaIndices(rgba, &block->alpha);
}
uint nv::compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alpha0 = 0;
uint8 alpha1 = 255;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
alpha0 = max(alpha0, alpha);
alpha1 = min(alpha1, alpha);
}
alpha0 = alpha0 - (alpha0 - alpha1) / 32;
alpha1 = alpha1 + (alpha0 - alpha1) / 32;
AlphaBlockDXT5 block0;
block0.alpha0 = alpha0;
block0.alpha1 = alpha1;
uint error0 = computeAlphaIndices(rgba, &block0);
AlphaBlockDXT5 block1;
block1.alpha0 = alpha1;
block1.alpha1 = alpha0;
uint error1 = computeAlphaIndices(rgba, &block1);
if (error0 < error1)
{
*block = block0;
return error0;
}
else
{
*block = block1;
return error1;
}
}
uint nv::compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 mina = 255;
uint8 maxa = 0;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
mina = min(mina, alpha);
maxa = max(maxa, alpha);
}
block->alpha0 = maxa;
block->alpha1 = mina;
/*int centroidDist = 256;
int centroid;
// Get the closest to the centroid.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int dist = abs(alpha - (maxa + mina) / 2);
if (dist < centroidDist)
{
centroidDist = dist;
centroid = alpha;
}
}*/
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, block);
int besta0 = maxa;
int besta1 = mina;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
//for (int a1 = mina; a1 < maxa; a1++)
{
//nvCheck(abs(a1-a0) > 8);
//if (abs(a0 - a1) < 8) continue;
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
if ((maxa-a0) + (a1-mina) > besterror)
continue;
block->alpha0 = a0;
block->alpha1 = a1;
int error = computeAlphaError(rgba, block);
if (error < besterror)
{
besterror = error;
besta0 = a0;
besta1 = a1;
}
}
}
block->alpha0 = besta0;
block->alpha1 = besta1;
}
return computeAlphaIndices(rgba, block);
}

View File

@ -1,84 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_FASTCOMPRESSDXT_H
#define NV_TT_FASTCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
// Color compression:
// Compressor that uses the extremes of the luminance axis.
// void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the extremes of the luminance axis.
// void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box.
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box and takes alpha into account.
// void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
// Simple, but slow compressor that tests all color pairs.
// void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force 6d search along the best fit axis.
// void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Spatial greedy search.
// void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force compressor for DXT5n
// void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
// Minimize error of the endpoints.
// void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
// uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
// uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
// Alpha compression:
void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block);
uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block);
// uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
} // nv namespace
#endif // NV_TT_FASTCOMPRESSDXT_H

View File

@ -23,8 +23,6 @@
#include <string.h> // memcpy
#include <nvcore/Containers.h> // nextPowerOfTwo
#include <nvcore/Memory.h>
#include "nvtt.h"
@ -120,8 +118,6 @@ void InputOptions::reset()
m.maxExtent = 0;
m.roundMode = RoundMode_None;
m.premultiplyAlpha = false;
}
@ -309,19 +305,6 @@ void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2,
//m.linearTransform.setRow(channel, w);
}
void InputOptions::setSwizzleTransform(int x, int y, int z, int w)
{
nvCheck(x >= 0 && x < 3);
nvCheck(y >= 0 && y < 3);
nvCheck(z >= 0 && z < 3);
nvCheck(w >= 0 && w < 3);
// m.xswizzle = x;
// m.yswizzle = y;
// m.zswizzle = z;
// m.wswizzle = w;
}
void InputOptions::setMaxExtents(int e)
{
nvDebugCheck(e > 0);
@ -333,10 +316,6 @@ void InputOptions::setRoundMode(RoundMode mode)
m.roundMode = mode;
}
void InputOptions::setPremultiplyAlpha(bool b)
{
m.premultiplyAlpha = b;
}
void InputOptions::Private::computeTargetExtents() const
{

View File

@ -78,8 +78,6 @@ namespace nvtt
uint maxExtent;
RoundMode roundMode;
bool premultiplyAlpha;
// @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere...
mutable uint targetWidth;
mutable uint targetHeight;

View File

@ -0,0 +1,368 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
using namespace nv;
using namespace OptimalCompress;
namespace
{
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
// Choose quantized color that produces less error. Used by DXT3 compressor.
inline static uint quantize4(uint8 a)
{
int q0 = (a >> 4) - 1;
int q1 = (a >> 4);
int q2 = (a >> 4) + 1;
q0 = (q0 << 4) | q0;
q1 = (q1 << 4) | q1;
q2 = (q2 << 4) | q2;
int d0 = abs(q0 - a);
int d1 = abs(q1 - a);
int d2 = abs(q2 - a);
if (d0 < d1 && d0 < d2) return q0 >> 4;
if (d1 < d2) return q1 >> 4;
return q2 >> 4;
}
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best;
for (uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += besterror;
}
return totalError;
}
static void computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
block->setIndex(i, best);
}
}
} // namespace
// Single color compressor, based on:
// https://mollyrocket.com/forums/viewtopic.php?t=392
void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
{
if (rgba.a < 128)
{
dxtBlock->col0.u = 0;
dxtBlock->col1.u = 0;
dxtBlock->indices = 0xFFFFFFFF;
}
else
{
compressDXT1(rgba, dxtBlock);
}
}
// Brute force green channel compressor
void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
uint8 ming = 63;
uint8 maxg = 0;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
}
block->col0.r = 31;
block->col1.r = 31;
block->col0.g = maxg;
block->col1.g = ming;
block->col0.b = 0;
block->col1.b = 0;
if (maxg - ming > 4)
{
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
for (int g1 = ming; g1 < g0-4; g1++)
{
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
}
void OptimalCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
dxtBlock->alpha0 = quantize4(rgba.color(0).a);
dxtBlock->alpha1 = quantize4(rgba.color(1).a);
dxtBlock->alpha2 = quantize4(rgba.color(2).a);
dxtBlock->alpha3 = quantize4(rgba.color(3).a);
dxtBlock->alpha4 = quantize4(rgba.color(4).a);
dxtBlock->alpha5 = quantize4(rgba.color(5).a);
dxtBlock->alpha6 = quantize4(rgba.color(6).a);
dxtBlock->alpha7 = quantize4(rgba.color(7).a);
dxtBlock->alpha8 = quantize4(rgba.color(8).a);
dxtBlock->alpha9 = quantize4(rgba.color(9).a);
dxtBlock->alphaA = quantize4(rgba.color(10).a);
dxtBlock->alphaB = quantize4(rgba.color(11).a);
dxtBlock->alphaC = quantize4(rgba.color(12).a);
dxtBlock->alphaD = quantize4(rgba.color(13).a);
dxtBlock->alphaE = quantize4(rgba.color(14).a);
dxtBlock->alphaF = quantize4(rgba.color(15).a);
}
void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
{
uint8 mina = 255;
uint8 maxa = 0;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
mina = min(mina, alpha);
maxa = max(maxa, alpha);
}
dxtBlock->alpha0 = maxa;
dxtBlock->alpha1 = mina;
/*int centroidDist = 256;
int centroid;
// Get the closest to the centroid.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int dist = abs(alpha - (maxa + mina) / 2);
if (dist < centroidDist)
{
centroidDist = dist;
centroid = alpha;
}
}*/
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, dxtBlock);
int besta0 = maxa;
int besta1 = mina;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
//for (int a1 = mina; a1 < maxa; a1++)
{
//nvCheck(abs(a1-a0) > 8);
//if (abs(a0 - a1) < 8) continue;
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
if ((maxa-a0) + (a1-mina) > besterror)
continue;
dxtBlock->alpha0 = a0;
dxtBlock->alpha1 = a1;
int error = computeAlphaError(rgba, dxtBlock);
if (error < besterror)
{
besterror = error;
besta0 = a0;
besta1 = a1;
}
}
}
dxtBlock->alpha0 = besta0;
dxtBlock->alpha1 = besta1;
}
computeAlphaIndices(rgba, dxtBlock);
}

View File

@ -0,0 +1,49 @@
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_OPTIMALCOMPRESSDXT_H
#define NV_TT_OPTIMALCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
namespace OptimalCompress
{
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
}
} // nv namespace
#endif // NV_TT_OPTIMALCOMPRESSDXT_H

View File

@ -21,15 +21,13 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "QuickCompressDXT.h"
#include "SingleColorLookup.h"
#include "OptimalCompressDXT.h"
using namespace nv;
@ -290,70 +288,6 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
dxtBlock->indices = computeIndices3(block, a, b);
}*/
namespace
{
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
} // namespace
namespace
{
@ -505,29 +439,14 @@ namespace
// Single color compressor, based on:
// https://mollyrocket.com/forums/viewtopic.php?t=392
void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), dxtBlock);
}
else
{
// read block
Vector3 block[16];
extractColorBlockRGB(rgba, block);
@ -554,29 +473,28 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
dxtBlock->indices = computeIndices4(block, maxColor, minColor);
optimizeEndPoints4(block, dxtBlock);
}
void QuickCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
{
if (rgba.a == 0)
{
dxtBlock->col0.u = 0;
dxtBlock->col1.u = 0;
dxtBlock->indices = 0xFFFFFFFF;
}
else
{
compressDXT1(rgba, dxtBlock);
}
}
void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
if (!rgba.hasAlpha())
bool hasAlpha = false;
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a < 128) {
hasAlpha = true;
break;
}
}
if (!hasAlpha)
{
compressDXT1(rgba, dxtBlock);
}
// @@ Handle single RGB, with varying alpha? We need tables for single color compressor in 3 color mode.
//else if (rgba.isSingleColorNoAlpha()) { ... }
else
{
// read block
@ -609,95 +527,14 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
}
// Brute force green channel compressor
void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
uint8 ming = 63;
uint8 maxg = 0;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
}
block->col0.r = 31;
block->col1.r = 31;
block->col0.g = maxg;
block->col1.g = ming;
block->col0.b = 0;
block->col1.b = 0;
if (maxg - ming > 4)
{
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
for (int g1 = ming; g1 < g0-4; g1++)
{
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
}
void QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
// @@ Round instead of truncate. When rounding take into account bit expansion.
dxtBlock->alpha0 = rgba.color(0).a >> 4;
dxtBlock->alpha1 = rgba.color(1).a >> 4;
dxtBlock->alpha2 = rgba.color(2).a >> 4;
dxtBlock->alpha3 = rgba.color(3).a >> 4;
dxtBlock->alpha4 = rgba.color(4).a >> 4;
dxtBlock->alpha5 = rgba.color(5).a >> 4;
dxtBlock->alpha6 = rgba.color(6).a >> 4;
dxtBlock->alpha7 = rgba.color(7).a >> 4;
dxtBlock->alpha8 = rgba.color(8).a >> 4;
dxtBlock->alpha9 = rgba.color(9).a >> 4;
dxtBlock->alphaA = rgba.color(10).a >> 4;
dxtBlock->alphaB = rgba.color(11).a >> 4;
dxtBlock->alphaC = rgba.color(12).a >> 4;
dxtBlock->alphaD = rgba.color(13).a >> 4;
dxtBlock->alphaE = rgba.color(14).a >> 4;
dxtBlock->alphaF = rgba.color(15).a >> 4;
}
void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
{
compressDXT1(rgba, &dxtBlock->color);
compressDXT3A(rgba, &dxtBlock->alpha);
OptimalCompress::compressDXT3A(rgba, &dxtBlock->alpha);
}
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount/*=8*/)
{
uint8 alpha0 = 0;
uint8 alpha1 = 255;
@ -717,7 +554,7 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
AlphaBlockDXT5 bestblock = block;
while(true)
for (int i = 0; i < iterationCount; i++)
{
optimizeAlpha8(rgba, &block);
uint error = computeAlphaIndices(rgba, &block);
@ -741,9 +578,8 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
*dxtBlock = bestblock;
}
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock)
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/)
{
compressDXT1(rgba, &dxtBlock->color);
compressDXT5A(rgba, &dxtBlock->alpha);
compressDXT5A(rgba, &dxtBlock->alpha, iterationCount);
}

View File

@ -37,17 +37,13 @@ namespace nv
namespace QuickCompress
{
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8);
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8);
}
} // nv namespace

View File

@ -159,7 +159,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
}
}
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16])
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
@ -189,6 +189,8 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
colorSums(colors, sums);
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
*sameColor = (axis == make_float3(0, 0, 0));
dps[idx] = dot(rawColors[idx], axis);
#if __DEVICE_EMULATION__
@ -205,43 +207,6 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
}
}
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16])
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
__shared__ float dps[16];
if (idx < 16)
{
// Read color and copy to shared mem.
uint c = image[(bid) * 16 + idx];
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
// No need to synchronize, 16 < warp size.
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Sort colors along the best fit line.
colorSums(colors, sums);
float2 axis = bestFitLine(colors, sums[0]);
dps[idx] = dot(colors[idx], axis);
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
sortColors(dps, xrefs);
float2 tmp = colors[idx];
colors[xrefs[idx]] = tmp;
}
}
////////////////////////////////////////////////////////////////////////////////
// Round color to RGB565 and expand
@ -258,26 +223,6 @@ inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
return v;
}
inline __device__ float2 roundAndExpand56(float2 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 31.0f);
v.y = rintf(__saturatef(v.y) * 63.0f);
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5);
v.x *= 0.03227752766457f; // approximate integer bit expansion.
v.y *= 0.01583151765563f;
return v;
}
inline __device__ float2 roundAndExpand88(float2 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 255.0f);
v.y = rintf(__saturatef(v.y) * 255.0f);
*w = ((ushort)v.x << 8) | ((ushort)v.y);
v.x *= 1.0f / 255.0f;
v.y *= 1.0f / 255.0f;
return v;
}
////////////////////////////////////////////////////////////////////////////////
// Evaluate permutations
@ -521,114 +466,6 @@ __device__ float evalPermutation3(const float3 * colors, const float * weights,
}
*/
__device__ float evalPermutation4(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
__device__ float evalPermutation3(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable3[bits & 3] * colors[i];
akku += prods3[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 4.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 4.0f) * (e.x + e.y);
}
__device__ float evalPermutationCTX(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 8-8 color and expand...
a = roundAndExpand88(a, start);
b = roundAndExpand88(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
////////////////////////////////////////////////////////////////////////////////
// Evaluate all permutations
@ -757,14 +594,12 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
}
*/
__device__ void evalAllPermutations(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
__device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
__shared__ uint s_permutations[160];
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
@ -772,7 +607,6 @@ __device__ void evalAllPermutations(const float2 * colors, float2 colorSum, cons
ushort start, end;
uint permutation = permutations[pidx];
if (pidx < 160) s_permutations[pidx] = permutation;
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
@ -791,30 +625,6 @@ __device__ void evalAllPermutations(const float2 * colors, float2 colorSum, cons
bestPermutation ^= 0x55555555; // Flip indices.
}
for(int i = 0; i < 3; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 160) break;
ushort start, end;
uint permutation = s_permutations[pidx];
float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
if (bestStart > bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
}
}
}
errors[idx] = bestError;
}
@ -852,40 +662,6 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
errors[idx] = bestError;
}
__device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
float error = evalPermutationCTX(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
errors[idx] = bestError;
}
////////////////////////////////////////////////////////////////////////////////
// Find index with minimum error
@ -996,11 +772,6 @@ __device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xr
result[bid].y = indices;
}
__device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result)
{
saveBlockDXT1(start, end, permutation, xrefs, result);
}
__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
{
const int bid = blockIdx.x;
@ -1062,6 +833,39 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint
}
}
__global__ void compressLevel4DXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float3 colors[16];
__shared__ float3 sums[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalLevel4Permutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result)
{
@ -1069,11 +873,18 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
__shared__ float3 sums[16];
__shared__ float weights[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, weights, xrefs);
loadColorBlock(image, colors, sums, weights, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
@ -1092,61 +903,6 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
}
__global__ void compressNormalDXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressCTX1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutationsCTX(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockCTX1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
/*
__device__ float computeError(const float weights[16], uchar a0, uchar a1)
{
@ -1352,17 +1108,12 @@ extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result
compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressLevel4DXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressNormalDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressCTX1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}

View File

@ -30,6 +30,7 @@
#include <nvtt/CompressionOptions.h>
#include <nvtt/OutputOptions.h>
#include <nvtt/QuickCompressDXT.h>
#include <nvtt/OptimalCompressDXT.h>
#include "CudaCompressDXT.h"
#include "CudaUtils.h"
@ -52,10 +53,8 @@ using namespace nvtt;
extern "C" void setupCompressKernel(const float weights[3]);
extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
#include "Bitmaps.h" // @@ Rename to BitmapTable.h
@ -121,113 +120,25 @@ bool CudaCompressor::isValid() const
// @@ This code is very repetitive and needs to be cleaned up.
#if 0
struct CudaCompressionKernel
void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
virtual void setup(const CompressionOptions::Private & compressionOptions)
{
setupCompressKernel(compressionOptions.colorWeight.ptr());
}
virtual void setBitmapTable();
virtual void runDeviceCode(int count);
virtual void runHostCode(int count);
};
void CudaCompressor::compressKernel(CudaCompressionKernel * kernel)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
clock_t start = clock();
kernel->setup(compressionOptions);
kernel->setBitmapTable(m_bitmapTable);
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
kernel->runDeviceCode(count, m_data, m_result);
kernel->runHostCode(count);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
kernel->outputResult(outputOptions.outputHandler);
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
m_image = image;
m_alphaMode = alphaMode;
}
#endif // 0
/// Compress image using CUDA.
void CudaCompressor::compressDXT1(const Image * image, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
@ -272,7 +183,7 @@ void CudaCompressor::compressDXT1(const Image * image, const CompressionOptions:
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
@ -286,18 +197,18 @@ void CudaCompressor::compressDXT1(const Image * image, const CompressionOptions:
/// Compress image using CUDA.
void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage);
convertToBlockLinear(m_image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
@ -317,13 +228,20 @@ void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Priva
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
QuickCompress::compressDXT3A(rgba, alphaBlocks + i);
OptimalCompress::compressDXT3A(rgba, alphaBlocks + i);
}
// Check for errors.
@ -355,7 +273,7 @@ void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Priva
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
@ -370,18 +288,18 @@ void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Priva
/// Compress image using CUDA.
void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage);
convertToBlockLinear(m_image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
@ -401,7 +319,14 @@ void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Priva
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
@ -439,7 +364,7 @@ void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Priva
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
@ -453,323 +378,3 @@ void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Priva
}
void CudaCompressor::compressDXT1n(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
clock_t start = clock();
setupCompressKernel(compressionOptions.colorWeight.ptr());
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressNormalKernelDXT1(count, m_data, m_result, m_bitmapTable);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
void CudaCompressor::compressCTX1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
clock_t start = clock();
setupCompressKernel(compressionOptions.colorWeight.ptr());
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernelCTX1(count, m_data, m_result, m_bitmapTable);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
#if 0
class Task
{
public:
explicit Task(uint numBlocks) : blockMaxCount(numBlocks), blockCount(0)
{
// System memory allocations.
blockLinearImage = new uint[blockMaxCount * 16];
xrefs = new uint[blockMaxCount * 16];
// Device memory allocations.
cudaMalloc((void**) &d_blockLinearImage, blockMaxCount * 16 * sizeof(uint));
cudaMalloc((void**) &d_compressedImage, blockMaxCount * 8U);
// @@ Check for allocation errors.
}
~Task()
{
delete [] blockLinearImage;
delete [] xrefs;
cudaFree(d_blockLinearImage);
cudaFree(d_compressedImage);
}
void addColorBlock(const ColorBlock & rgba)
{
nvDebugCheck(!isFull());
// @@ Count unique colors?
/*
// Convert colors to vectors.
Array<Vector3> pointArray(16);
for(int i = 0; i < 16; i++) {
const Color32 color = rgba.color(i);
pointArray.append(Vector3(color.r, color.g, color.b));
}
// Find best fit line.
const Vector3 axis = Fit::bestLine(pointArray).direction();
// Project points to axis.
float dps[16];
uint * order = &xrefs[blockCount * 16];
for (uint i = 0; i < 16; ++i)
{
dps[i] = dot(pointArray[i], axis);
order[i] = i;
}
// Sort them.
for (uint i = 0; i < 16; ++i)
{
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
{
swap(dps[j], dps[j - 1]);
swap(order[j], order[j - 1]);
}
}
*/
// Write sorted colors to blockLinearImage.
for(uint i = 0; i < 16; ++i)
{
// blockLinearImage[blockCount * 16 + i] = rgba.color(order[i]);
blockLinearImage[blockCount * 16 + i] = rgba.color(i);
}
++blockCount;
}
bool isFull()
{
nvDebugCheck(blockCount <= blockMaxCount);
return blockCount == blockMaxCount;
}
void flush(const OutputOptions::Private & outputOptions)
{
if (blockCount == 0)
{
// Nothing to do.
return;
}
// Copy input color blocks.
cudaMemcpy(d_blockLinearImage, blockLinearImage, blockCount * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernelDXT1(blockCount, d_blockLinearImage, d_compressedImage, d_bitmaps);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
uint * compressedImage = blockLinearImage;
cudaMemcpy(compressedImage, d_compressedImage, blockCount * 8, cudaMemcpyDeviceToHost);
// @@ Sort block indices.
// Output result.
if (outputOptions.outputHandler != NULL)
{
// outputOptions.outputHandler->writeData(compressedImage, blockCount * 8);
}
blockCount = 0;
}
private:
const uint blockMaxCount;
uint blockCount;
uint * blockLinearImage;
uint * xrefs;
uint * d_blockLinearImage;
uint * d_compressedImage;
};
void nv::cudaCompressDXT1_2(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
#if defined HAVE_CUDA
const uint w = image->width();
const uint h = image->height();
const uint blockNum = ((w + 3) / 4) * ((h + 3) / 4);
const uint blockMax = 32768; // 49152, 65535
setupCompressKernelDXT1(compressionOptions.colorWeight.ptr());
ColorBlock rgba;
Task task(min(blockNum, blockMax));
clock_t start = clock();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
task.addColorBlock(rgba);
if (task.isFull())
{
task.flush(outputOptions);
}
}
}
task.flush(outputOptions);
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
#endif // 0

View File

@ -39,17 +39,20 @@ namespace nv
bool isValid() const;
void compressDXT1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1n(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressCTX1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
uint * m_bitmapTable;
uint * m_data;
uint * m_result;
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
} // nv namespace

View File

@ -87,64 +87,6 @@ inline __device__ __host__ bool operator ==(float3 a, float3 b)
return a.x == b.x && a.y == b.y && a.z == b.z;
}
// float2 operators
inline __device__ __host__ float2 operator *(float2 a, float2 b)
{
return make_float2(a.x*b.x, a.y*b.y);
}
inline __device__ __host__ float2 operator *(float f, float2 v)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator *(float2 v, float f)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator +(float2 a, float2 b)
{
return make_float2(a.x+b.x, a.y+b.y);
}
inline __device__ __host__ void operator +=(float2 & b, float2 a)
{
b.x += a.x;
b.y += a.y;
}
inline __device__ __host__ float2 operator -(float2 a, float2 b)
{
return make_float2(a.x-b.x, a.y-b.y);
}
inline __device__ __host__ void operator -=(float2 & b, float2 a)
{
b.x -= a.x;
b.y -= a.y;
}
inline __device__ __host__ float2 operator /(float2 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float2 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
}
inline __device__ __host__ float dot(float2 a, float2 b)
{
return a.x * b.x + a.y * b.y;
}
inline __device__ __host__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
@ -301,89 +243,5 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, fl
return firstEigenVector(covariance);
}
// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
{
// @@ 8 iterations is probably more than enough.
float2 v = make_float2(1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1];
float y = v.x * matrix[1] + v.y * matrix[2];
float m = max(x, y);
float iv = 1.0f / m;
if (m == 0.0f) iv = 0.0f;
v = make_float2(x*iv, y*iv);
}
return v;
}
inline __device__ void colorSums(const float2 * colors, float2 * sums)
{
#if __DEVICE_EMULATION__
float2 color_sum = make_float2(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx];
sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1];
#endif
}
inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[3] = {0, 0, 0};
for (int i = 0; i < 16; i++)
{
float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[3] += a.y * a.y;
}
#else
const int idx = threadIdx.x;
float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
__shared__ float covariance[16*3];
covariance[3 * idx + 0] = diff.x * diff.x;
covariance[3 * idx + 1] = diff.x * diff.y;
covariance[3 * idx + 2] = diff.y * diff.y;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector2D(covariance);
}
#endif // CUDAMATH_H

View File

@ -74,7 +74,7 @@ bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
if (isWindowsVista()) return false;
//if (isWindowsVista()) return false;
//if (isWindowsVista() || !isWow32()) return false;
#endif
int count = deviceCount();

View File

@ -1,60 +0,0 @@
#include "nvtt_experimental.h"
struct NvttImage
{
NvttImage() :
m_constant(false),
m_image(NULL),
m_floatImage(NULL)
{
}
~NvttImage()
{
if (m_constant && m_image) m_image->unwrap();
delete m_image;
delete m_floatImage;
}
bool m_constant;
Image * m_image;
FloatImage * m_floatImage;
};
NvttImage * nvttCreateImage()
{
return new NvttImage();
}
void nvttDestroyImage(NvttImage * img)
{
delete img;
}
void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data)
{
nvCheck(img != NULL);
if (format == NVTT_InputFormat_BGRA_8UB)
{
img->m_constant = false;
img->m_image->allocate(w, h);
memcpy(img->m_image->pixels(), data, w * h * 4);
}
else
{
nvCheck(false);
}
}
void nvttCompressImage(NvttImage * img, NvttFormat format)
{
nvCheck(img != NULL);
// @@ Invoke appropriate compressor.
}
#endif // NVTT_EXPERIMENTAL_H

View File

@ -1,55 +0,0 @@
#ifndef NVTT_EXPERIMENTAL_H
#define NVTT_EXPERIMENTAL_H
#include <nvtt/nvtt.h>
typedef struct NvttImage NvttImage;
NvttImage * nvttCreateImage();
void nvttDestroyImage(NvttImage * img);
void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data);
void nvttCompressImage(NvttImage * img, NvttFormat format);
// How to control the compression parameters?
// Using many arguments:
// void nvttCompressImage(img, format, quality, r, g, b, a, ...);
// Using existing compression option class:
// compressionOptions = nvttCreateCompressionOptions();
// nvttSetCompressionOptionsFormat(compressionOptions, format);
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
// nvttSetCompressionOptionsColorWeights(compressionOptions, r, g, b, a);
// ...
// nvttCompressImage(img, compressionOptions);
// Using thread local context state:
// void nvttSetCompressionFormat(format);
// void nvttSetCompressionQuality(quality);
// void nvttSetCompressionColorWeights(r, g, b, a);
// ...
// nvttCompressImage(img);
// Using thread local context state, but with GL style function arguments:
// nvttCompressorParameteri(NVTT_FORMAT, format);
// nvttCompressorParameteri(NVTT_QUALITY, quality);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_RED, r);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_GREEN, g);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_BLUE, b);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_ALPHA, a);
// or nvttCompressorParameter4f(NVTT_COLOR_WEIGHTS, r, g, b, a);
// ...
// nvttCompressImage(img);
// How do we get the compressed output?
// - Using callbacks. (via new entrypoints, or through outputOptions)
// - Return it explicitely from nvttCompressImage.
// - Store it along the image, retrieve later explicitely with 'nvttGetCompressedData(img, ...)'
#endif // NVTT_EXPERIMENTAL_H

View File

@ -83,9 +83,6 @@ namespace nvtt
Format_BC3n = Format_DXT5n,
Format_BC4, // ATI1
Format_BC5, // 3DC, ATI2
Format_DXT1n,
Format_CTX1,
};
/// Quality modes.
@ -157,7 +154,6 @@ namespace nvtt
{
ColorTransform_None,
ColorTransform_Linear,
ColorTransform_Swizzle
};
/// Extents rounding mode.
@ -222,14 +218,10 @@ namespace nvtt
// Set color transforms. @@ Not implemented!
NVTT_API void setColorTransform(ColorTransform t);
NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3);
NVTT_API void setSwizzleTransform(int x, int y, int z, int w);
// Set resizing options.
NVTT_API void setMaxExtents(int d);
NVTT_API void setRoundMode(RoundMode mode);
// Set whether or not to premultiply color by alpha
NVTT_API void setPremultiplyAlpha(bool b);
};

View File

@ -29,6 +29,8 @@
#include "colourblock.h"
#include <cfloat>
#include "fastclusterlookup.inl"
namespace squish {
FastClusterFit::FastClusterFit()
@ -97,91 +99,6 @@ void FastClusterFit::SetColourSet( ColourSet const* colours, int flags )
}
struct Precomp {
float alpha2_sum;
float beta2_sum;
float alphabeta_sum;
float factor;
};
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
void FastClusterFit::DoPrecomputation()
{
int i = 0;
// Three element clusters:
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
int c2 = 16 - c0 - c1;
/*if (c2 == 16) {
// a = b = x2 / 16
s_threeElement[i].alpha2_sum = 0;
s_threeElement[i].beta2_sum = 16;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_threeElement[i].alpha2_sum = 16;
s_threeElement[i].beta2_sum = 0;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_threeElement[i].alpha2_sum = c0 + c1 * 0.25f;
s_threeElement[i].beta2_sum = c2 + c1 * 0.25f;
s_threeElement[i].alphabeta_sum = c1 * 0.25f;
s_threeElement[i].factor = 1.0f / (s_threeElement[i].alpha2_sum * s_threeElement[i].beta2_sum - s_threeElement[i].alphabeta_sum * s_threeElement[i].alphabeta_sum);
}
i++;
}
}
//printf("%d three cluster elements\n", i);
// Four element clusters:
i = 0;
for( int c0 = 0; c0 <= 16; c0++)
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
int c3 = 16 - c0 - c1 - c2;
/*if (c3 == 16) {
// a = b = x3 / 16
s_fourElement[i].alpha2_sum = 16.0f;
s_fourElement[i].beta2_sum = 0.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_fourElement[i].alpha2_sum = 0.0f;
s_fourElement[i].beta2_sum = 16.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_fourElement[i].alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
s_fourElement[i].beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
s_fourElement[i].alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
s_fourElement[i].factor = 1.0f / (s_fourElement[i].alpha2_sum * s_fourElement[i].beta2_sum - s_fourElement[i].alphabeta_sum * s_fourElement[i].alphabeta_sum);
}
i++;
}
}
}
//printf("%d four cluster elements\n", i);
}
void FastClusterFit::SetMetric(float r, float g, float b)
{
#if SQUISH_USE_SIMD

View File

@ -44,8 +44,6 @@ public:
void SetMetric(float r, float g, float b);
float GetBestError() const;
static void DoPrecomputation();
// Make them public
virtual void Compress3( void* block );
virtual void Compress4( void* block );

File diff suppressed because it is too large Load Diff

View File

@ -1,221 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <stdio.h> // printf
#include <stdlib.h> // rand
#include <time.h> // clock
#include <string.h> // memcpy, memcmp
#include <assert.h>
#define FRAME_COUNT 1000
#define WIDTH 2048
#define HEIGHT 2048
#define INPUT_SIZE (WIDTH*HEIGHT)
#define OUTPUT_SIZE (WIDTH*HEIGHT/16*2)
static int s_input[INPUT_SIZE];
static int s_reference[OUTPUT_SIZE];
static int s_output[OUTPUT_SIZE];
static int s_frame = 0;
struct MyOutputHandler : public nvtt::OutputHandler
{
MyOutputHandler() : m_ptr(NULL) {}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{
assert(size == sizeof(int) * OUTPUT_SIZE);
assert(width == WIDTH);
assert(height == HEIGHT);
assert(depth == 1);
assert(face == 0);
assert(miplevel == 0);
m_ptr = (unsigned char *)s_output;
if (s_frame == 1)
{
// Save first result as reference.
memcpy(s_reference, s_output, sizeof(int) * OUTPUT_SIZE);
}
else if (s_frame > 1)
{
// Compare against reference.
if (memcmp(s_output, s_reference, sizeof(int) * OUTPUT_SIZE) != 0)
{
printf("Compressed image different to original.\n");
exit(EXIT_FAILURE);
}
}
}
virtual bool writeData(const void * data, int size)
{
memcpy(m_ptr, data, size);
m_ptr += size;
return true;
}
unsigned char * m_ptr;
};
void precomp()
{
unsigned int bitmaps[1024];
int num = 0;
printf("{\n");
printf("\t%8X,\n", 0);
bitmaps[0] = 0;
num = 1;
for (int a = 1; a <= 15; a++)
{
for (int b = a; b <= 15; b++)
{
for (int c = b; c <= 15; c++)
{
int indices[16];
int i = 0;
for(; i < a; i++) {
indices[i] = 0;
}
for(; i < a+b; i++) {
indices[i] = 2;
}
for(; i < a+b+c; i++) {
indices[i] = 3;
}
for(; i < 16; i++) {
indices[i] = 1;
}
unsigned int bm = 0;
for(i = 0; i < 16; i++) {
bm |= indices[i] << (i * 2);
}
printf("\t0x%8X, // %d %d %d %d\n", bm, a-0, b-a, c-b, 16-c);
bitmaps[num] = bm;
num++;
}
}
}
printf("}\n");
printf("// num = %d\n", num);
/*
for( int i = imax; i >= 0; --i )
{
// second cluster [i,j) is one third along
for( int m = i; m < 16; ++m )
{
indices[m] = 2;
}
const int jmax = ( i == 0 ) ? 15 : 16;
for( int j = jmax; j >= i; --j )
{
// third cluster [j,k) is two thirds along
for( int m = j; m < 16; ++m )
{
indices[m] = 3;
}
int kmax = ( j == 0 ) ? 15 : 16;
for( int k = kmax; k >= j; --k )
{
// last cluster [k,n) is at the end
if( k < 16 )
{
indices[k] = 1;
}
uint bitmap = 0;
bool hasThree = false;
for(int p = 0; p < 16; p++) {
bitmap |= indices[p] << (p * 2);
}
bitmaps[num] = bitmap;
num++;
}
}
}
*/
}
int main(int argc, char *argv[])
{
//precomp();
nvtt::InputOptions inputOptions;
inputOptions.setTextureLayout(nvtt::TextureType_2D, WIDTH, HEIGHT);
for (int i = 0; i < INPUT_SIZE; i++)
{
s_input[i] = rand();
}
inputOptions.setMipmapData(s_input, WIDTH, HEIGHT);
inputOptions.setMipmapGeneration(false);
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(nvtt::Format_DXT1);
// compressionOptions.setFormat(nvtt::Format_DXT1n);
// compressionOptions.setFormat(nvtt::Format_CTX1);
nvtt::OutputOptions outputOptions;
outputOptions.setOutputHeader(false);
MyOutputHandler outputHandler;
outputOptions.setOutputHandler(&outputHandler);
nvtt::Compressor compressor;
for (s_frame = 0; s_frame < FRAME_COUNT; s_frame++)
{
clock_t start = clock();
printf("compressing frame %d:\n", s_frame);
compressor.process(inputOptions, compressionOptions, outputOptions);
clock_t end = clock();
printf("time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
}
return EXIT_SUCCESS;
}

View File

@ -42,11 +42,11 @@ struct MyOutputHandler : public nvtt::OutputHandler
MyOutputHandler(const char * name) : total(0), progress(0), percentage(0), stream(new nv::StdOutputStream(name)) {}
virtual ~MyOutputHandler() { delete stream; }
virtual void setTotal(int64 t)
void setTotal(int64 t)
{
total = t + 128;
}
virtual void setDisplayProgress(bool b)
void setDisplayProgress(bool b)
{
verbose = b;
}
@ -140,7 +140,6 @@ int main(int argc, char *argv[])
bool silent = false;
bool bc1n = false;
nvtt::Format format = nvtt::Format_BC1;
bool premultiplyAlpha = false;
const char * externalCompressor = NULL;
@ -174,10 +173,6 @@ int main(int argc, char *argv[])
{
noMipmaps = true;
}
else if (strcmp("-premula", argv[i]) == 0)
{
premultiplyAlpha = true;
}
// Compression options.
else if (strcmp("-fast", argv[i]) == 0)
@ -271,8 +266,7 @@ int main(int argc, char *argv[])
printf(" -tonormal\tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n");
printf(" -repeat \tRepeat wrapping mode.\n");
printf(" -nomips \tDisable mipmap generation.\n");
printf(" -premula \tPremultiply alpha into color channel.\n\n");
printf(" -nomips \tDisable mipmap generation.\n\n");
printf("Compression options:\n");
printf(" -fast \tFast compression.\n");
@ -379,12 +373,6 @@ int main(int argc, char *argv[])
inputOptions.setMipmapGeneration(false);
}
if (premultiplyAlpha)
{
inputOptions.setPremultiplyAlpha(true);
inputOptions.setAlphaMode(nvtt::AlphaMode_Premultiplied);
}
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(format);
if (fast)
@ -408,6 +396,21 @@ int main(int argc, char *argv[])
compressionOptions.setExternalCompressor(externalCompressor);
}
if (format == nvtt::Format_RGB)
{
compressionOptions.setQuantization(true, false, false);
//compressionOptions.setPixelFormat(16, 0xF000, 0x0F00, 0x00F0, 0x000F);
compressionOptions.setPixelFormat(16,
0x0F00,
0x00F0,
0x000F,
0xF000);
// 0x003F0000,
// 0x00003F00,
// 0x0000003F,
// 0x3F000000);
}
MyErrorHandler errorHandler;
MyOutputHandler outputHandler(output);

View File

@ -31,144 +31,40 @@
#include "cmdline.h"
#include <time.h> // clock
int main(int argc, char *argv[])
{
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
bool forcenormal = false;
bool mipmaps = false;
bool faces = false;
nv::Path input;
nv::Path output;
// Parse arguments.
for (int i = 1; i < argc; i++)
if (argc != 2)
{
if (strcmp("-forcenormal", argv[i]) == 0)
{
forcenormal = true;
}
else if (strcmp("-mipmaps", argv[i]) == 0)
{
mipmaps = true;
}
else if (strcmp("-faces", argv[i]) == 0)
{
faces = true;
}
else if (argv[i][0] != '-')
{
input = argv[i];
if (i+1 < argc && argv[i+1][0] != '-')
{
output = argv[i+1];
}
else
{
output.copy(input.str());
output.stripExtension();
output.append(".tga");
}
break;
}
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
if (input.isNull())
{
printf("usage: nvdecompress [options] infile [outfile]\n\n");
printf("Note: the .tga extension is forced on outfile\n\n");
printf("Input options:\n");
printf(" -forcenormal \tThe input image is a normal map.\n");
printf(" -mipmaps \tDecompress all mipmaps.\n");
printf(" -faces \tDecompress all faces.\n");
printf("usage: nvdecompress 'ddsfile'\n\n");
return 1;
}
// Load surface.
nv::DirectDrawSurface dds(input);
nv::DirectDrawSurface dds(argv[1]);
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
printf("The file '%s' is not a valid DDS file.\n", argv[1]);
return 1;
}
if (!dds.isSupported() || dds.isTexture3D())
{
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
return 1;
}
uint faceCount;
if (dds.isTexture2D())
{
faceCount = 1;
}
else
{
nvCheck(dds.isTextureCube());
faceCount = 6;
}
uint mipmapCount = dds.mipmapCount();
clock_t start = clock();
// apply arguments
if (forcenormal)
{
dds.setNormalFlag(true);
}
if (!faces)
{
faceCount = 1;
}
if (!mipmaps)
{
mipmapCount = 1;
}
nv::Image mipmap;
nv::Path name;
// strip extension, we force the tga extension
output.stripExtension();
// extract faces and mipmaps
for (uint f = 0; f < faceCount; f++)
{
for (uint m = 0; m < mipmapCount; m++)
{
dds.mipmap(&mipmap, f, m);
// set output filename, if we are doing faces and/or mipmaps
name.copy(output);
if (faces) name.appendFormat("_face%d", f);
if (mipmaps) name.appendFormat("_mipmap%d", m);
nv::Path name(argv[1]);
name.stripExtension();
name.append(".tga");
nv::StdOutputStream stream(name.str());
if (stream.isError()) {
fprintf(stderr, "Error opening '%s' for writting\n", name.str());
printf("Error opening '%s' for writting\n", name.str());
return 1;
}
nv::ImageIO::saveTGA(stream, &mipmap);
}
}
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
// @@ TODO: Add command line options to output mipmaps, cubemap faces, etc.
nv::Image img;
dds.mipmap(&img, 0, 0); // get first image
nv::ImageIO::saveTGA(stream, &img);
return 0;
}

View File

@ -73,10 +73,12 @@ int main(int argc, char *argv[])
float scale = 0.5f;
float gamma = 2.2f;
nv::Filter * filter = NULL;
nv::AutoPtr<nv::Filter> filter;
nv::Path input;
nv::Path output;
nv::FloatImage::WrapMode wrapMode = nv::FloatImage::WrapMode_Mirror;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
@ -108,9 +110,18 @@ int main(int argc, char *argv[])
else if (strcmp("lanczos", argv[i]) == 0) filter = new nv::LanczosFilter();
else if (strcmp("kaiser", argv[i]) == 0) {
filter = new nv::KaiserFilter(3);
((nv::KaiserFilter *)filter)->setParameters(4.0f, 1.0f);
((nv::KaiserFilter *)filter.ptr())->setParameters(4.0f, 1.0f);
}
}
else if (strcmp("-f", argv[i]) == 0)
{
if (i+1 == argc) break;
i++;
if (strcmp("mirror", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Mirror;
else if (strcmp("repeat", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Repeat;
else if (strcmp("clamp", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Clamp;
}
else if (argv[i][0] != '-')
{
input = argv[i];
@ -140,6 +151,10 @@ int main(int argc, char *argv[])
printf(" * mitchell\n");
printf(" * lanczos\n");
printf(" * kaiser\n");
printf(" -w mode One of the following: (default = 'mirror')\n");
printf(" * mirror\n");
printf(" * repeat\n");
printf(" * clamp\n");
return 1;
}
@ -155,15 +170,14 @@ int main(int argc, char *argv[])
nv::FloatImage fimage(&image);
fimage.toLinear(0, 3, gamma);
nv::AutoPtr<nv::FloatImage> fresult(fimage.downSample(*filter, uint(image.width() * scale), uint(image.height() * scale), nv::FloatImage::WrapMode_Mirror));
nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(*filter, uint(image.width() * scale), uint(image.height() * scale), wrapMode));
nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
result->setFormat(nv::Image::Format_ARGB);
nv::StdOutputStream stream(output);
nv::ImageIO::saveTGA(stream, result.ptr()); // @@ Add generic save function. Add support for png too.
delete filter;
return 0;
}