Compare commits
343 Commits
Author | SHA1 | Date | |
---|---|---|---|
f817d49872 | |||
09ad232142 | |||
69363fcc92 | |||
2f0fe5149f | |||
f744e700ae | |||
48f5dd4603 | |||
78bb864c14 | |||
7bc1eb6a29 | |||
f63abb3ef6 | |||
e8500dead4 | |||
2e1d1e70ae | |||
bc3299b78b | |||
d01bdaf370 | |||
d90b3d927b | |||
3e5c47d9fb | |||
6a667fff50 | |||
dfe081d32a | |||
acc02abaf1 | |||
8871fefe89 | |||
2543f4c9ed | |||
ac46c40b3e | |||
bcf0df2b49 | |||
8c7f54056c | |||
34cd266d8c | |||
9a9366cf4c | |||
8820c43175 | |||
18a3abf794 | |||
384f74ba39 | |||
7d75840398 | |||
8ea52efbf4 | |||
d86a89742e | |||
fd11f5e7ef | |||
dcfdabaee3 | |||
2d97ee9c03 | |||
14f49b6003 | |||
ea7dabc6b1 | |||
0878c0e967 | |||
a088ae5789 | |||
a52d3b7cdc | |||
307c418acc | |||
d0218cb18b | |||
c1f9c4df42 | |||
78d65e8368 | |||
18474cdb33 | |||
b7fbd1fc9b | |||
9de3298d6b | |||
568f34d838 | |||
e38e584db2 | |||
8655259379 | |||
7d65633f63 | |||
cb62c3c461 | |||
573cc1b371 | |||
9c6f6e143e | |||
126816ef72 | |||
2ca6e4a1bd | |||
b839b873e1 | |||
ab473f4ec5 | |||
f1ebbd4da6 | |||
ac79935c88 | |||
2aca4673ab | |||
43893d5d0f | |||
009eaf2aa6 | |||
fd2492670e | |||
7d88f4fa32 | |||
60022acaa7 | |||
a5faf51738 | |||
8365df0adf | |||
2d38f4fb2c | |||
3adf00b4b9 | |||
63897f3fe6 | |||
71f29a27f3 | |||
720be412fa | |||
8d361eee22 | |||
8d54d22cb2 | |||
782a127071 | |||
154e117e13 | |||
603d8ad1a2 | |||
bed4d78f6b | |||
d7f8fba7a7 | |||
e0a7e103c1 | |||
319ed6bac0 | |||
9e959d0191 | |||
53265596a3 | |||
ae24cb163d | |||
fb75d6065d | |||
ae744f88e6 | |||
5ac76b68c9 | |||
f2090df7a5 | |||
0a8de141a6 | |||
9aaee3ae16 | |||
820eb374d5 | |||
974cacda5f | |||
953a63d7b5 | |||
19477d60c0 | |||
8a48250bcb | |||
18d95584cc | |||
9b3075030e | |||
dd98ce6eab | |||
35ff0e5aa6 | |||
8529dcf755 | |||
56543e1a92 | |||
72543c9307 | |||
872c61e1d1 | |||
7f3cee4db9 | |||
3f036a11a6 | |||
3df9aff396 | |||
be12367910 | |||
c59a2e0a4b | |||
0abec17ab4 | |||
6b67f4a0d7 | |||
6e2565d1a4 | |||
3e3c8a4d18 | |||
8e836591ee | |||
50b8b67185 | |||
36850f6241 | |||
ed58bd90ff | |||
ab73c790e1 | |||
4a34c673a4 | |||
0ce578668f | |||
53e6c4c911 | |||
d99cf11e2e | |||
d9832ed22c | |||
a02649faa9 | |||
a28ebb4ccf | |||
0f5a5e5d24 | |||
8f0b129a52 | |||
098bc2f905 | |||
5943e8f42f | |||
ba72ebafcb | |||
a89735994c | |||
e7fd290af6 | |||
35b97e7a13 | |||
472c2d691f | |||
e48f56a15e | |||
25e32c8ff2 | |||
db63ba7fa4 | |||
3df66be089 | |||
3a52923697 | |||
9953883d26 | |||
5ed9090012 | |||
93e2fb46a9 | |||
03c9ec0f62 | |||
88fc5ca18e | |||
6fb29816a2 | |||
c67edca820 | |||
9d5242594b | |||
69c74d7a5e | |||
b7ea7a255b | |||
17790a34df | |||
7741a99b90 | |||
36dd7fb76b | |||
8fa870bf0c | |||
1afdf2da8a | |||
242aa4254e | |||
4f576d5539 | |||
2411f3f5db | |||
1c6b65ad52 | |||
f92a2191f2 | |||
7f9b10329b | |||
49409e9d92 | |||
f753cc9702 | |||
07a4daed7b | |||
2ad15489bb | |||
fa53ddcecd | |||
7a8b3aecc9 | |||
94357626f7 | |||
19342d8adf | |||
2ed4fee447 | |||
f03d702d84 | |||
10de10b9c2 | |||
508f9fbdc2 | |||
e965b0e4a9 | |||
1f4d313d0f | |||
dc0b78ad14 | |||
b8eb12afc1 | |||
1975883bed | |||
9bda107603 | |||
b4f17b968a | |||
e5ae0c0e20 | |||
a03411e451 | |||
931580edc5 | |||
fd2f5465f8 | |||
127052f404 | |||
a30490ab9b | |||
1ec115c7ec | |||
a4f56b65b8 | |||
bb69acec6c | |||
4bbf5e96f4 | |||
4a85f8e48d | |||
f34b7ce84f | |||
010905edd3 | |||
7bb2d55d35 | |||
e3a7cc19dd | |||
379605d30a | |||
c05c4e155b | |||
fd73484bfc | |||
f29d7dd938 | |||
3a5dc4783a | |||
d4a713451e | |||
a302475fa6 | |||
6e988ea4c8 | |||
7731181900 | |||
41f6e0ba73 | |||
11073171a1 | |||
0805832b44 | |||
a4dcd414ca | |||
4ff8a83f90 | |||
48da357385 | |||
e1916d43c8 | |||
321f320bfb | |||
df32fedc7c | |||
a7396b70ba | |||
d9ca49cc5e | |||
56849b78ad | |||
a769831fb5 | |||
7486201a7e | |||
1813624992 | |||
5fa27adfcd | |||
6d1891a7e9 | |||
8fb1d70d0b | |||
c26c52d59c | |||
c3329d4675 | |||
1cefc366f8 | |||
7df0885c4f | |||
1c5da0e341 | |||
36ba75b598 | |||
1628831878 | |||
12e774ea74 | |||
66b18f2dbd | |||
9ea1934097 | |||
9771e72702 | |||
7776bd5c17 | |||
6d8a75462a | |||
cf18077eda | |||
aa37e7a868 | |||
d01a5c1661 | |||
36ed6bebda | |||
5234060618 | |||
f402f28643 | |||
f047043eb2 | |||
7eac4195c4 | |||
0f5692d1ea | |||
b2d6122769 | |||
cd59058fc2 | |||
db14e048e1 | |||
0c36fcf626 | |||
68be24bf00 | |||
b284669993 | |||
2f6e885ced | |||
1957120c26 | |||
d7ddcb9263 | |||
f5f6e88585 | |||
13e2d2e447 | |||
0b13b6d0d9 | |||
ad85b0fcbe | |||
0515d9a0a0 | |||
16adf94635 | |||
e9002a7d86 | |||
3161fca9d9 | |||
bb5b02df0e | |||
1941e27148 | |||
02c3abb394 | |||
86ef67bbfa | |||
79529f994f | |||
c2508d9eeb | |||
b1cd916105 | |||
96655b3e7c | |||
529c0075e1 | |||
c70e5d6121 | |||
7394644719 | |||
d5055300e2 | |||
b2e7d717c2 | |||
756f12c994 | |||
206bfcf0f3 | |||
15cfd1c06b | |||
8b26ecc865 | |||
488f3c8f42 | |||
458b8814a7 | |||
c08acc8a71 | |||
45f7244f20 | |||
f412ec8efb | |||
a1a34f546f | |||
7ef88c6f7e | |||
3368f9039b | |||
870a3fe438 | |||
82bed4ac9a | |||
b8a9395117 | |||
65f769160d | |||
524ebbec8c | |||
ce85eaff3e | |||
6befe3505c | |||
ff6f7f0506 | |||
55e7d3dec4 | |||
b5e373b734 | |||
6ce542b5c0 | |||
58e5f6534f | |||
eda4786ca6 | |||
29a720bf82 | |||
1120f83f7d | |||
c38c3dc584 | |||
7d3d0ede9d | |||
bc66a7ad74 | |||
ca8d17abf5 | |||
10e79dac9d | |||
e068964423 | |||
ea340443d9 | |||
fb2b0cb38c | |||
c01566cd2f | |||
47bdab8e27 | |||
70267fda15 | |||
aebcea412c | |||
bccdcd49da | |||
68e9f05794 | |||
0f186e688f | |||
38e9652d64 | |||
f08114c1b5 | |||
7b9f891f92 | |||
787c9bb8fb | |||
42220b981e | |||
70331a37fd | |||
2ffc4cd7ad | |||
4ba8e87a38 | |||
d440d68aa8 | |||
48f61dbfc0 | |||
94c3fa75a8 | |||
c562af6d9b | |||
a889f2fda6 | |||
d855d0461b | |||
246f2a409a | |||
6a6b3edce1 | |||
52b3bc9437 | |||
f6ab357b09 | |||
ce3a65c03e | |||
8d9bf5c0b3 | |||
ab5265e642 | |||
fd1d5e41c7 | |||
3980d5dc21 | |||
15e7125b4b | |||
921ee354c0 | |||
e3f7e303e4 | |||
1df69495fc | |||
91eb30667f | |||
6db5cffca6 |
@ -1,4 +1,4 @@
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.4.0)
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0)
|
||||
PROJECT(NV)
|
||||
ENABLE_TESTING()
|
||||
|
||||
@ -16,6 +16,13 @@ MESSAGE(STATUS "Setting optimal options")
|
||||
MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}")
|
||||
MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}")
|
||||
|
||||
IF(NVTT_SHARED)
|
||||
SET(NVCORE_SHARED TRUE)
|
||||
SET(NVMATH_SHARED TRUE)
|
||||
SET(NVIMAGE_SHARED TRUE)
|
||||
ENDIF(NVTT_SHARED)
|
||||
|
||||
|
||||
ADD_SUBDIRECTORY(src)
|
||||
|
||||
IF(WIN32)
|
||||
|
44
ChangeLog
44
ChangeLog
@ -1,7 +1,42 @@
|
||||
NVIDIA Texture Tools version 2.1.0
|
||||
* CTX1 CUDA compressor.
|
||||
* DXT1n CUDA compressor.
|
||||
* Support alpha premultiplication by Charles Nicholson. See issue 30.
|
||||
NVIDIA Texture Tools version 2.0.7
|
||||
* Output correct exit codes. Fixes issue 92.
|
||||
* Fix thread-safety errors. Fixes issue 90.
|
||||
* Add SIMD power method. Fixes issue 94.
|
||||
* Interact better with applications that already use CUDA.
|
||||
* Faster CPU compression.
|
||||
|
||||
NVIDIA Texture Tools version 2.0.6
|
||||
* Fix dll version checking.
|
||||
* Detect CUDA 2.1 and future CUDA versions correctly.
|
||||
* Print CUDA detection message in nvcompress.
|
||||
* Select the fastest CUDA device.
|
||||
* Compile squish with -fPIC. Fixes issue 74.
|
||||
* Fix warnings under gcc 4.3.2.
|
||||
* Fix nvzoom option typo by Frank Richter. Fixes issue 81.
|
||||
* Do not use CUDA to compress small mipmaps. Fixes issue 76.
|
||||
* Compute mipmaps of semi-transparent images correctly.
|
||||
* Shutdown CUDA properly. Fixes issue 83.
|
||||
* Fix pixel format converions. Fixes issue 87.
|
||||
* Update single color compression tables. Fixes issue 85.
|
||||
|
||||
NVIDIA Texture Tools version 2.0.5
|
||||
* Fix error in single color compressor. Fixes issue 66.
|
||||
* Detect mismatch between CUDA runtime and driver, and disable CUDA in that case.
|
||||
* Fix cmake files when compiling NVTT as a shared library.
|
||||
* When linking nvtt dynamically on unix, link all libraries dynamically.
|
||||
* Select fastest CUDA device.
|
||||
|
||||
NVIDIA Texture Tools version 2.0.4
|
||||
* Fix error in RGB format output; reported by jonsoh. See issue 49.
|
||||
* Added support RGB format dithering by jonsoh. Fixes issue 50 and 51.
|
||||
* Prevent infinite loop in indexMirror when width equal 1. Fixes issue 65.
|
||||
* Implement general scale filter, including upsampling.
|
||||
|
||||
NVIDIA Texture Tools version 2.0.3
|
||||
* More accurate DXT3 compressor. Fixes issue 38.
|
||||
* Remove legacy compressors. Fix issue 34.
|
||||
* Check for single color in all compressors. Fixes issue 43.
|
||||
* Fix error in fast downsample filter, reported by Noel Llopis.
|
||||
|
||||
NVIDIA Texture Tools version 2.0.2
|
||||
* Fix copy ctor error reported by Richard Sim.
|
||||
@ -10,7 +45,6 @@ NVIDIA Texture Tools version 2.0.2
|
||||
* Fix RGBA modes with less than 32 bpp by Viktor Linder.
|
||||
* Fix alpha decompression by Amorilia. See issue 40.
|
||||
* Avoid default-initialized constructors for POD types, reported by Jim Tilander.
|
||||
* Improved decompressor tool submitted by Amorilia. See issue 41.
|
||||
* Add single color compresor for DXT1a.
|
||||
* Set swizzle code to ATI2 files. See issue 41.
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
--------------------------------------------------------------------------------
|
||||
NVIDIA Texture Tools
|
||||
README.txt
|
||||
Version 2.1
|
||||
Version 2.0
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
@ -57,7 +57,7 @@ MARK_AS_ADVANCED (CUDA_FOUND CUDA_COMPILER CUDA_RUNTIME_LIBRARY)
|
||||
|
||||
|
||||
#SET(CUDA_OPTIONS "-ncfe")
|
||||
SET(CUDA_OPTIONS "")
|
||||
SET(CUDA_OPTIONS "--host-compilation=C")
|
||||
|
||||
IF (CUDA_EMULATION)
|
||||
SET (CUDA_OPTIONS "${CUDA_OPTIONS} -deviceemu")
|
||||
|
2
configure
vendored
2
configure
vendored
@ -53,7 +53,7 @@ echo "-- Configuring nvidia-texture-tools "`cat VERSION`
|
||||
|
||||
mkdir -p ./build
|
||||
cd ./build
|
||||
$CMAKE .. -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1
|
||||
$CMAKE .. -DNVTT_SHARED=1 -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1
|
||||
cd ..
|
||||
|
||||
echo ""
|
||||
|
BIN
gnuwin32/lib/libpng.a
Normal file
BIN
gnuwin32/lib/libpng.a
Normal file
Binary file not shown.
BIN
gnuwin32/lib/libz.a
Normal file
BIN
gnuwin32/lib/libz.a
Normal file
Binary file not shown.
@ -278,6 +278,7 @@
|
||||
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
|
||||
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
|
||||
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
|
||||
LinkTimeCodeGeneration="1"
|
||||
TargetMachine="17"
|
||||
/>
|
||||
<Tool
|
||||
|
@ -281,6 +281,10 @@
|
||||
RelativePath="..\..\..\src\nvcore\Debug.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvcore\Library.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvcore\Memory.cpp"
|
||||
>
|
||||
@ -315,6 +319,10 @@
|
||||
RelativePath="..\..\..\src\nvcore\DefsVcWin32.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvcore\Library.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvcore\Memory.h"
|
||||
>
|
||||
|
@ -277,6 +277,7 @@
|
||||
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
|
||||
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
|
||||
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
|
||||
LinkTimeCodeGeneration="1"
|
||||
TargetMachine="17"
|
||||
/>
|
||||
<Tool
|
||||
|
@ -355,6 +355,10 @@
|
||||
RelativePath="..\..\..\src\nvimage\nvimage.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvimage\PixelFormat.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvimage\PsdFile.h"
|
||||
>
|
||||
|
@ -277,6 +277,10 @@
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvmath\Plane.cpp"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Header Files"
|
||||
@ -295,6 +299,10 @@
|
||||
RelativePath="..\..\..\src\nvmath\Matrix.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvmath\Plane.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvmath\Vector.h"
|
||||
>
|
||||
|
@ -3,18 +3,18 @@ Microsoft Visual Studio Solution File, Format Version 9.00
|
||||
# Visual Studio 2005
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
|
||||
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
|
||||
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
|
||||
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
|
||||
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
|
||||
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
|
||||
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
|
||||
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
|
||||
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
|
||||
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
|
||||
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
|
||||
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
|
||||
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}"
|
||||
@ -34,16 +34,16 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvdd
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
|
||||
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
|
||||
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
|
||||
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
|
||||
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
|
||||
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
|
||||
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
|
||||
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}"
|
||||
@ -55,18 +55,13 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nv
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
|
||||
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
|
||||
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
|
||||
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
|
||||
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stress", "stress\stress.vcproj", "{317B694E-B5C1-42A6-956F-FC12B69175A6}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug (no cuda)|Any CPU = Debug (no cuda)|Any CPU
|
||||
@ -319,22 +314,6 @@ Global
|
||||
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU
|
||||
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|x64.ActiveCfg = Debug|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Any CPU.ActiveCfg = Debug|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Any CPU.ActiveCfg = Release|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.Build.0 = Release|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|x64.ActiveCfg = Release|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Any CPU.ActiveCfg = Release|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32
|
||||
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
@ -53,8 +53,8 @@ END
|
||||
//
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 2,1,0,0
|
||||
PRODUCTVERSION 2,1,0,0
|
||||
FILEVERSION 2,0,6,0
|
||||
PRODUCTVERSION 2,0,6,0
|
||||
FILEFLAGSMASK 0x17L
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
@ -71,12 +71,12 @@ BEGIN
|
||||
BEGIN
|
||||
VALUE "CompanyName", "NVIDIA Corporation"
|
||||
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
|
||||
VALUE "FileVersion", "2, 1, 0, 0"
|
||||
VALUE "FileVersion", "2, 0, 6, 0"
|
||||
VALUE "InternalName", "nvtt"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2007"
|
||||
VALUE "OriginalFilename", "nvtt.dll"
|
||||
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
|
||||
VALUE "ProductVersion", "2, 1, 0, 0"
|
||||
VALUE "ProductVersion", "2, 0, 6, 0"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
@ -691,7 +691,7 @@
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -keep -ccbin "$(VCInstallDir)bin" -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I"$(CUDA_INC_PATH)" -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu
"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -m32 -ccbin "$(VCInstallDir)bin" -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I"$(CUDA_INC_PATH)" -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu
"
|
||||
AdditionalDependencies="CudaMath.h"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
@ -701,7 +701,7 @@
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -keep -ccbin "$(VCInstallDir)bin" -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I"$(CUDA_INC_PATH)" -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu
"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -m64 -ccbin "$(VCInstallDir)bin" -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I"$(CUDA_INC_PATH)" -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu
"
|
||||
AdditionalDependencies="CudaMath.h"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
@ -711,7 +711,7 @@
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -keep -ccbin "$(VCInstallDir)bin" -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I"$(CUDA_INC_PATH)" -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu
"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -m32 -ccbin "$(VCInstallDir)bin" -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I"$(CUDA_INC_PATH)" -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu
"
|
||||
AdditionalDependencies="CudaMath.h"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
@ -721,7 +721,7 @@
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -keep -ccbin "$(VCInstallDir)bin" -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I"$(CUDA_INC_PATH)" -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu
"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -m64 -ccbin "$(VCInstallDir)bin" -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I"$(CUDA_INC_PATH)" -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu
"
|
||||
AdditionalDependencies="CudaMath.h"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
@ -849,10 +849,6 @@
|
||||
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvtt\FastCompressDXT.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvtt\InputOptions.cpp"
|
||||
>
|
||||
@ -865,6 +861,10 @@
|
||||
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvtt\OutputOptions.cpp"
|
||||
>
|
||||
@ -911,10 +911,6 @@
|
||||
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvtt\FastCompressDXT.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvtt\InputOptions.h"
|
||||
>
|
||||
@ -927,6 +923,10 @@
|
||||
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvtt\OutputOptions.h"
|
||||
>
|
||||
|
@ -1,201 +0,0 @@
|
||||
<?xml version="1.0" encoding="Windows-1252"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="8.00"
|
||||
Name="stress"
|
||||
ProjectGUID="{317B694E-B5C1-42A6-956F-FC12B69175A6}"
|
||||
RootNamespace="stress"
|
||||
Keyword="Win32Proj"
|
||||
>
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"
|
||||
/>
|
||||
</Platforms>
|
||||
<ToolFiles>
|
||||
</ToolFiles>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
ConfigurationType="1"
|
||||
CharacterSet="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="..;..\..\..\src"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="3"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="4"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
|
||||
LinkIncremental="2"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
ConfigurationType="1"
|
||||
CharacterSet="1"
|
||||
WholeProgramOptimization="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalIncludeDirectories="..;..\..\..\src"
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
|
||||
RuntimeLibrary="2"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
|
||||
LinkIncremental="1"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="2"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="Source Files"
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvtt\tests\stress.cpp"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Header Files"
|
||||
Filter="h;hpp;hxx;hm;inl;inc;xsd"
|
||||
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
|
||||
>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Resource Files"
|
||||
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
|
||||
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
|
||||
>
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
@ -1,11 +1,11 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
SUBDIRS(nvcore)
|
||||
SUBDIRS(nvmath)
|
||||
SUBDIRS(nvimage)
|
||||
SUBDIRS(nvtt)
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
# OpenGL
|
||||
INCLUDE(FindOpenGL)
|
||||
IF(OPENGL_FOUND)
|
||||
@ -84,7 +84,6 @@ ELSE(PNG_FOUND)
|
||||
ENDIF(PNG_FOUND)
|
||||
|
||||
# TIFF
|
||||
SET(TIFF_NAMES libtiff)
|
||||
INCLUDE(FindTIFF)
|
||||
IF(TIFF_FOUND)
|
||||
SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise")
|
||||
@ -106,7 +105,8 @@ ENDIF(OPENEXR_FOUND)
|
||||
FIND_PACKAGE(Qt4)
|
||||
|
||||
# Threads
|
||||
FIND_PACKAGE(Threads)
|
||||
FIND_PACKAGE(Threads REQUIRED)
|
||||
MESSAGE(STATUS "Use thread library: ${CMAKE_THREAD_LIBS_INIT}")
|
||||
|
||||
# configuration file
|
||||
INCLUDE(CheckIncludeFiles)
|
||||
|
@ -1,147 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_CORE_ALGORITHMS_H
|
||||
#define NV_CORE_ALGORITHMS_H
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
/// Return the maximum of two values.
|
||||
template <typename T>
|
||||
inline const T & max(const T & a, const T & b)
|
||||
{
|
||||
//return std::max(a, b);
|
||||
if( a < b ) {
|
||||
return b;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
/// Return the minimum of two values.
|
||||
template <typename T>
|
||||
inline const T & min(const T & a, const T & b)
|
||||
{
|
||||
//return std::min(a, b);
|
||||
if( b < a ) {
|
||||
return b;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
/// Clamp between two values.
|
||||
template <typename T>
|
||||
inline const T & clamp(const T & x, const T & a, const T & b)
|
||||
{
|
||||
return min(max(x, a), b);
|
||||
}
|
||||
|
||||
/// Delete all the elements of a container.
|
||||
template <typename T>
|
||||
void deleteAll(T & container)
|
||||
{
|
||||
for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
|
||||
{
|
||||
delete container[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// @@ Swap should be implemented here.
|
||||
|
||||
|
||||
#if 0
|
||||
// This does not use swap, but copies, in some cases swaps are much faster than copies!
|
||||
// Container should implement operator[], and size()
|
||||
template <class Container, class T>
|
||||
void insertionSort(Container<T> & container)
|
||||
{
|
||||
const uint n = container.size();
|
||||
for (uint i=1; i < n; ++i)
|
||||
{
|
||||
T value = container[i];
|
||||
uint j = i;
|
||||
while (j > 0 && container[j-1] > value)
|
||||
{
|
||||
container[j] = container[j-1];
|
||||
--j;
|
||||
}
|
||||
if (i != j)
|
||||
{
|
||||
container[j] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Container, class T>
|
||||
void quickSort(Container<T> & container)
|
||||
{
|
||||
quickSort(container, 0, container.count());
|
||||
}
|
||||
|
||||
{
|
||||
/* threshhold for transitioning to insertion sort */
|
||||
while (n > 12) {
|
||||
int c01,c12,c,m,i,j;
|
||||
|
||||
/* compute median of three */
|
||||
m = n >> 1;
|
||||
c = p[0] > p[m];
|
||||
c01 = c;
|
||||
c = &p[m] > &p[n-1];
|
||||
c12 = c;
|
||||
/* if 0 >= mid >= end, or 0 < mid < end, then use mid */
|
||||
if (c01 != c12) {
|
||||
/* otherwise, we'll need to swap something else to middle */
|
||||
int z;
|
||||
c = p[0] < p[n-1];
|
||||
/* 0>mid && mid<n: 0>n => n; 0<n => 0 */
|
||||
/* 0<mid && mid>n: 0>n => 0; 0<n => n */
|
||||
z = (c == c12) ? 0 : n-1;
|
||||
swap(p[z], p[m]);
|
||||
}
|
||||
/* now p[m] is the median-of-three */
|
||||
/* swap it to the beginning so it won't move around */
|
||||
swap(p[0], p[m]);
|
||||
|
||||
/* partition loop */
|
||||
i=1;
|
||||
j=n-1;
|
||||
for(;;) {
|
||||
/* handling of equality is crucial here */
|
||||
/* for sentinels & efficiency with duplicates */
|
||||
for (;;++i) {
|
||||
c = p[i] > p[0];
|
||||
if (!c) break;
|
||||
}
|
||||
a = &p[0];
|
||||
for (;;--j) {
|
||||
b=&p[j];
|
||||
c = p[j] > p[0]
|
||||
if (!c) break;
|
||||
}
|
||||
/* make sure we haven't crossed */
|
||||
if (i >= j) break;
|
||||
swap(p[i], p[j]);
|
||||
|
||||
++i;
|
||||
--j;
|
||||
}
|
||||
/* recurse on smaller side, iterate on larger */
|
||||
if (j < (n-i)) {
|
||||
quickSort(p, j);
|
||||
p = p+i;
|
||||
n = n-i;
|
||||
}
|
||||
else {
|
||||
quickSort(p+i, n-i);
|
||||
n = j;
|
||||
}
|
||||
}
|
||||
|
||||
insertionSort();
|
||||
}
|
||||
#endif // 0
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_CORE_ALGORITHMS_H
|
@ -80,13 +80,13 @@ public:
|
||||
/// Clear all the bits.
|
||||
void clearAll()
|
||||
{
|
||||
memset(m_bitArray.mutableBuffer(), 0, m_bitArray.size());
|
||||
memset(m_bitArray.unsecureBuffer(), 0, m_bitArray.size());
|
||||
}
|
||||
|
||||
/// Set all the bits.
|
||||
void setAll()
|
||||
{
|
||||
memset(m_bitArray.mutableBuffer(), 0xFF, m_bitArray.size());
|
||||
memset(m_bitArray.unsecureBuffer(), 0xFF, m_bitArray.size());
|
||||
}
|
||||
|
||||
/// Toggle all the bits.
|
||||
|
@ -3,13 +3,7 @@ ADD_SUBDIRECTORY(poshlib)
|
||||
|
||||
SET(CORE_SRCS
|
||||
nvcore.h
|
||||
DefsGnucDarwin.h
|
||||
DefsGnucLinux.h
|
||||
DefsGnucWin32.h
|
||||
DefsVcWin32.h
|
||||
Ptr.h
|
||||
RefCounted.h
|
||||
RefCounted.cpp
|
||||
BitArray.h
|
||||
Memory.h
|
||||
Memory.cpp
|
||||
@ -26,15 +20,18 @@ SET(CORE_SRCS
|
||||
TextWriter.cpp
|
||||
Radix.h
|
||||
Radix.cpp
|
||||
CpuInfo.h
|
||||
CpuInfo.cpp
|
||||
Algorithms.h)
|
||||
Library.h
|
||||
Library.cpp)
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
# targets
|
||||
ADD_DEFINITIONS(-DNVCORE_EXPORTS)
|
||||
|
||||
IF(UNIX)
|
||||
SET(LIBS ${LIBS} ${CMAKE_DL_LIBS})
|
||||
ENDIF(UNIX)
|
||||
|
||||
IF(NVCORE_SHARED)
|
||||
ADD_LIBRARY(nvcore SHARED ${CORE_SRCS})
|
||||
ELSE(NVCORE_SHARED)
|
||||
|
@ -19,7 +19,6 @@ Do not use memmove in insert & remove, use copy ctors instead.
|
||||
#include <nvcore/nvcore.h>
|
||||
#include <nvcore/Memory.h>
|
||||
#include <nvcore/Debug.h>
|
||||
//#include <nvcore/Stream.h>
|
||||
|
||||
#include <string.h> // memmove
|
||||
#include <new> // for placement new
|
||||
@ -71,10 +70,40 @@ namespace nv
|
||||
{
|
||||
// Templates
|
||||
|
||||
/// Return the maximum of two values.
|
||||
template <typename T>
|
||||
inline const T & max(const T & a, const T & b)
|
||||
{
|
||||
//return std::max(a, b);
|
||||
if( a < b ) {
|
||||
return b;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
/// Return the minimum of two values.
|
||||
template <typename T>
|
||||
inline const T & min(const T & a, const T & b)
|
||||
{
|
||||
//return std::min(a, b);
|
||||
if( b < a ) {
|
||||
return b;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
/// Clamp between two values.
|
||||
template <typename T>
|
||||
inline const T & clamp(const T & x, const T & a, const T & b)
|
||||
{
|
||||
return min(max(x, a), b);
|
||||
}
|
||||
|
||||
/// Swap two values.
|
||||
template <typename T>
|
||||
inline void swap(T & a, T & b)
|
||||
{
|
||||
//return std::swap(a, b);
|
||||
T temp = a;
|
||||
a = b;
|
||||
b = temp;
|
||||
@ -105,6 +134,16 @@ namespace nv
|
||||
uint operator()(uint x) const { return x; }
|
||||
};
|
||||
|
||||
/// Delete all the elements of a container.
|
||||
template <typename T>
|
||||
void deleteAll(T & container)
|
||||
{
|
||||
for(typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
|
||||
{
|
||||
delete container[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Return the next power of two.
|
||||
* @see http://graphics.stanford.edu/~seander/bithacks.html
|
||||
@ -115,7 +154,7 @@ namespace nv
|
||||
inline uint nextPowerOfTwo( uint x )
|
||||
{
|
||||
nvDebugCheck( x != 0 );
|
||||
#if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction.
|
||||
#if 1 // On modern CPUs this is as fast as using the bsr instruction.
|
||||
x--;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
@ -138,6 +177,15 @@ namespace nv
|
||||
return (n & (n-1)) == 0;
|
||||
}
|
||||
|
||||
/// Simple iterator interface.
|
||||
template <typename T>
|
||||
struct Iterator
|
||||
{
|
||||
virtual void advance();
|
||||
virtual bool isDone();
|
||||
virtual T current();
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Replacement for std::vector that is easier to debug and provides
|
||||
@ -204,7 +252,7 @@ namespace nv
|
||||
const T * buffer() const { return m_buffer; }
|
||||
|
||||
/// Get vector pointer.
|
||||
T * mutableBuffer() { return m_buffer; }
|
||||
T * unsecureBuffer() { return m_buffer; }
|
||||
|
||||
/// Is vector empty.
|
||||
bool isEmpty() const { return m_size == 0; }
|
||||
@ -776,13 +824,13 @@ namespace nv
|
||||
}
|
||||
|
||||
/// Number of entries in the hash.
|
||||
int size()
|
||||
int size() const
|
||||
{
|
||||
return entry_count;
|
||||
}
|
||||
|
||||
/// Number of entries in the hash.
|
||||
int count()
|
||||
int count() const
|
||||
{
|
||||
return size();
|
||||
}
|
||||
|
@ -1,88 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#include <nvcore/CpuInfo.h>
|
||||
#include <nvcore/Debug.h>
|
||||
|
||||
using namespace nv;
|
||||
|
||||
#if NV_OS_WIN32
|
||||
|
||||
#define _WIN32_WINNT 0x0501
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
|
||||
|
||||
static bool isWow64()
|
||||
{
|
||||
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "IsWow64Process");
|
||||
|
||||
BOOL bIsWow64 = FALSE;
|
||||
|
||||
if (NULL != fnIsWow64Process)
|
||||
{
|
||||
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return bIsWow64 == TRUE;
|
||||
}
|
||||
|
||||
#endif // NV_OS_WIN32
|
||||
|
||||
|
||||
|
||||
uint CpuInfo::processorCount()
|
||||
{
|
||||
#if NV_OS_WIN32
|
||||
SYSTEM_INFO sysInfo;
|
||||
|
||||
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
|
||||
|
||||
if (isWow64())
|
||||
{
|
||||
GetNativeSystemInfo(&sysInfo);
|
||||
}
|
||||
else
|
||||
{
|
||||
GetSystemInfo(&sysInfo);
|
||||
}
|
||||
|
||||
uint count = (uint)sysInfo.dwNumberOfProcessors;
|
||||
nvDebugCheck(count >= 1);
|
||||
|
||||
return count;
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
uint CpuInfo::coreCount()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool CpuInfo::hasMMX()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CpuInfo::hasSSE()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CpuInfo::hasSSE2()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CpuInfo::hasSSE3()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,88 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_CORE_CPUINFO_H
|
||||
#define NV_CORE_CPUINFO_H
|
||||
|
||||
#include <nvcore/nvcore.h>
|
||||
|
||||
#if NV_CC_MSVC
|
||||
# include <intrin.h> // __rdtsc
|
||||
#endif
|
||||
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
// CPU Information.
|
||||
class CpuInfo
|
||||
{
|
||||
static uint processorCount();
|
||||
static uint coreCount();
|
||||
|
||||
static bool hasMMX();
|
||||
static bool hasSSE();
|
||||
static bool hasSSE2();
|
||||
static bool hasSSE3();
|
||||
|
||||
};
|
||||
|
||||
#if NV_CC_MSVC
|
||||
#pragma intrinsic(__rdtsc)
|
||||
|
||||
inline uint64 rdtsc()
|
||||
{
|
||||
return __rdtsc();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if NV_CC_GNUC
|
||||
|
||||
#if defined(__i386__)
|
||||
|
||||
inline /*volatile*/ uint64 rdtsc()
|
||||
{
|
||||
uint64 x;
|
||||
//__asm__ volatile ("rdtsc" : "=A" (x));
|
||||
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
|
||||
return x;
|
||||
}
|
||||
|
||||
#elif defined(__x86_64__)
|
||||
|
||||
static __inline__ uint64 rdtsc(void)
|
||||
{
|
||||
unsigned int hi, lo;
|
||||
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
|
||||
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
|
||||
}
|
||||
|
||||
#elif defined(__powerpc__)
|
||||
|
||||
static __inline__ uint64 rdtsc(void)
|
||||
{
|
||||
uint64 result=0;
|
||||
unsigned long int upper, lower, tmp;
|
||||
__asm__ volatile(
|
||||
"0: \n"
|
||||
"\tmftbu %0 \n"
|
||||
"\tmftb %1 \n"
|
||||
"\tmftbu %2 \n"
|
||||
"\tcmpw %2,%0 \n"
|
||||
"\tbne 0b \n"
|
||||
: "=r"(upper),"=r"(lower),"=r"(tmp)
|
||||
);
|
||||
result = upper;
|
||||
result = result<<32;
|
||||
result = result|lower;
|
||||
|
||||
return(result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // NV_CC_GNUC
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_CORE_CPUINFO_H
|
@ -28,7 +28,7 @@
|
||||
#endif
|
||||
|
||||
#if NV_OS_LINUX && defined(HAVE_EXECINFO_H)
|
||||
# include <execinfo.h>
|
||||
# include <execinfo.h> // backtrace
|
||||
# if NV_CC_GNUC // defined(HAVE_CXXABI_H)
|
||||
# include <cxxabi.h>
|
||||
# endif
|
||||
@ -38,7 +38,14 @@
|
||||
# include <unistd.h> // getpid
|
||||
# include <sys/types.h>
|
||||
# include <sys/sysctl.h> // sysctl
|
||||
# include <ucontext.h>
|
||||
# include <sys/ucontext.h>
|
||||
# undef HAVE_EXECINFO_H
|
||||
# if defined(HAVE_EXECINFO_H) // only after OSX 10.5
|
||||
# include <execinfo.h> // backtrace
|
||||
# if NV_CC_GNUC // defined(HAVE_CXXABI_H)
|
||||
# include <cxxabi.h>
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <stdexcept> // std::runtime_error
|
||||
@ -128,6 +135,14 @@ namespace
|
||||
|
||||
#if defined(HAVE_EXECINFO_H) // NV_OS_LINUX
|
||||
|
||||
static bool nvHasStackTrace() {
|
||||
#if NV_OS_DARWIN
|
||||
return backtrace != NULL;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void nvPrintStackTrace(void * trace[], int size, int start=0) {
|
||||
char ** string_array = backtrace_symbols(trace, size);
|
||||
|
||||
@ -166,24 +181,36 @@ namespace
|
||||
|
||||
static void * callerAddress(void * secret)
|
||||
{
|
||||
# if NV_OS_DARWIN && NV_CPU_PPC
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *) ucp->uc_mcontext->ss.srr0;
|
||||
# elif NV_OS_DARWIN && NV_CPU_X86
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *) ucp->uc_mcontext->ss.eip;
|
||||
# elif NV_CPU_X86_64
|
||||
// #define REG_RIP REG_INDEX(rip) // seems to be 16
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *)ucp->uc_mcontext.gregs[REG_RIP];
|
||||
# elif NV_CPU_X86
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/];
|
||||
# elif NV_CPU_PPC
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *) ucp->uc_mcontext.regs->nip;
|
||||
# if NV_OS_DARWIN
|
||||
# if defined(_STRUCT_MCONTEXT)
|
||||
# if NV_CPU_PPC
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *) ucp->uc_mcontext->__ss.__srr0;
|
||||
# elif NV_CPU_X86
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *) ucp->uc_mcontext->__ss.__eip;
|
||||
# endif
|
||||
# else
|
||||
# if NV_CPU_PPC
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *) ucp->uc_mcontext->ss.srr0;
|
||||
# elif NV_CPU_X86
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *) ucp->uc_mcontext->ss.eip;
|
||||
# endif
|
||||
# endif
|
||||
# else
|
||||
return NULL;
|
||||
# if NV_CPU_X86_64
|
||||
// #define REG_RIP REG_INDEX(rip) // seems to be 16
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *)ucp->uc_mcontext.gregs[REG_RIP];
|
||||
# elif NV_CPU_X86
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/];
|
||||
# elif NV_CPU_PPC
|
||||
ucontext_t * ucp = (ucontext_t *)secret;
|
||||
return (void *) ucp->uc_mcontext.regs->nip;
|
||||
# endif
|
||||
# endif
|
||||
|
||||
// How to obtain the instruction pointers in different platforms, from mlton's source code.
|
||||
@ -228,17 +255,18 @@ namespace
|
||||
}
|
||||
|
||||
# if defined(HAVE_EXECINFO_H)
|
||||
if (nvHasStackTrace()) // in case of weak linking
|
||||
{
|
||||
void * trace[64];
|
||||
int size = backtrace(trace, 64);
|
||||
|
||||
void * trace[64];
|
||||
int size = backtrace(trace, 64);
|
||||
|
||||
if (pnt != NULL) {
|
||||
// Overwrite sigaction with caller's address.
|
||||
trace[1] = pnt;
|
||||
if (pnt != NULL) {
|
||||
// Overwrite sigaction with caller's address.
|
||||
trace[1] = pnt;
|
||||
}
|
||||
|
||||
nvPrintStackTrace(trace, size, 1);
|
||||
}
|
||||
|
||||
nvPrintStackTrace(trace, size, 1);
|
||||
|
||||
# endif // defined(HAVE_EXECINFO_H)
|
||||
|
||||
exit(0);
|
||||
@ -373,9 +401,12 @@ namespace
|
||||
# endif
|
||||
|
||||
# if defined(HAVE_EXECINFO_H)
|
||||
void * trace[64];
|
||||
int size = backtrace(trace, 64);
|
||||
nvPrintStackTrace(trace, size, 3);
|
||||
if (nvHasStackTrace())
|
||||
{
|
||||
void * trace[64];
|
||||
int size = backtrace(trace, 64);
|
||||
nvPrintStackTrace(trace, size, 2);
|
||||
}
|
||||
# endif
|
||||
|
||||
// Exit cleanly.
|
||||
@ -422,9 +453,12 @@ void NV_CDECL nvDebug(const char *msg, ...)
|
||||
void debug::dumpInfo()
|
||||
{
|
||||
#if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H)
|
||||
void * trace[64];
|
||||
int size = backtrace(trace, 64);
|
||||
nvPrintStackTrace(trace, size, 1);
|
||||
if (nvHasStackTrace())
|
||||
{
|
||||
void * trace[64];
|
||||
int size = backtrace(trace, 64);
|
||||
nvPrintStackTrace(trace, size, 1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -115,6 +115,7 @@ namespace nv
|
||||
{
|
||||
NVCORE_API void dumpInfo();
|
||||
|
||||
// These functions are not thread safe.
|
||||
NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
|
||||
NVCORE_API void resetMessageHandler();
|
||||
|
||||
|
@ -2,8 +2,7 @@
|
||||
#error "Do not include this file directly."
|
||||
#endif
|
||||
|
||||
#include <stdlib.h> // uint8_t, int8_t, ...
|
||||
|
||||
#include <stdint.h> // uint8_t, int8_t, ...
|
||||
|
||||
// Function linkage
|
||||
#define DLL_IMPORT
|
||||
|
@ -19,7 +19,9 @@
|
||||
|
||||
// Set standard function names.
|
||||
#define snprintf _snprintf
|
||||
#define vsnprintf _vsnprintf
|
||||
#if _MSC_VER < 1500
|
||||
# define vsnprintf _vsnprintf
|
||||
#endif
|
||||
#define vsscanf _vsscanf
|
||||
#define chdir _chdir
|
||||
#define getcwd _getcwd
|
||||
|
41
src/nvcore/Library.cpp
Normal file
41
src/nvcore/Library.cpp
Normal file
@ -0,0 +1,41 @@
|
||||
|
||||
#include "Library.h"
|
||||
#include "Debug.h"
|
||||
|
||||
#if NV_OS_WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define VC_EXTRALEAN
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
void * nvLoadLibrary(const char * name)
|
||||
{
|
||||
#if NV_OS_WIN32
|
||||
return (void *)LoadLibraryExA( name, NULL, 0 );
|
||||
#else
|
||||
return dlopen(name, RTLD_LAZY);
|
||||
#endif
|
||||
}
|
||||
|
||||
void nvUnloadLibrary(void * handle)
|
||||
{
|
||||
nvDebugCheck(handle != NULL);
|
||||
#if NV_OS_WIN32
|
||||
FreeLibrary((HMODULE)handle);
|
||||
#else
|
||||
dlclose(handle);
|
||||
#endif
|
||||
}
|
||||
|
||||
void * nvBindSymbol(void * handle, const char * symbol)
|
||||
{
|
||||
#if NV_OS_WIN32
|
||||
return (void *)GetProcAddress((HMODULE)handle, symbol);
|
||||
#else
|
||||
return (void *)dlsym(handle, symbol);
|
||||
#endif
|
||||
}
|
50
src/nvcore/Library.h
Normal file
50
src/nvcore/Library.h
Normal file
@ -0,0 +1,50 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#ifndef NV_CORE_LIBRARY_H
|
||||
#define NV_CORE_LIBRARY_H
|
||||
|
||||
#include <nvcore/nvcore.h>
|
||||
|
||||
#if NV_OS_WIN32
|
||||
#define LIBRARY_NAME(name) #name ".dll"
|
||||
#elif NV_OS_DARWIN
|
||||
#define NV_LIBRARY_NAME(name) "lib" #name ".dylib"
|
||||
#else
|
||||
#define NV_LIBRARY_NAME(name) "lib" #name ".so"
|
||||
#endif
|
||||
|
||||
NVCORE_API void * nvLoadLibrary(const char * name);
|
||||
NVCORE_API void nvUnloadLibrary(void * lib);
|
||||
NVCORE_API void * nvBindSymbol(void * lib, const char * symbol);
|
||||
|
||||
class NVCORE_CLASS Library
|
||||
{
|
||||
public:
|
||||
Library(const char * name)
|
||||
{
|
||||
handle = nvLoadLibrary(name);
|
||||
}
|
||||
~Library()
|
||||
{
|
||||
if (isValid())
|
||||
{
|
||||
nvUnloadLibrary(handle);
|
||||
}
|
||||
}
|
||||
|
||||
bool isValid() const
|
||||
{
|
||||
return handle != NULL;
|
||||
}
|
||||
|
||||
void * bindSymbol(const char * symbol)
|
||||
{
|
||||
return nvBindSymbol(handle, symbol);
|
||||
}
|
||||
|
||||
private:
|
||||
void * handle;
|
||||
};
|
||||
|
||||
|
||||
#endif // NV_CORE_LIBRARY_H
|
@ -24,7 +24,7 @@ __forceinline void nvPrefetch(const void * mem)
|
||||
#else // NV_CC_MSVC
|
||||
|
||||
// do nothing in other case.
|
||||
#define piPrefetch(ptr)
|
||||
#define nvPrefetch(ptr)
|
||||
|
||||
#endif // NV_CC_MSVC
|
||||
|
||||
|
136
src/nvcore/Ptr.h
136
src/nvcore/Ptr.h
@ -8,11 +8,6 @@
|
||||
|
||||
#include <stdio.h> // NULL
|
||||
|
||||
#define NV_DECLARE_PTR(Class) \
|
||||
typedef SmartPtr<class Class> Class ## Ptr; \
|
||||
typedef SmartPtr<const class Class> ClassConst ## Ptr
|
||||
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
@ -48,8 +43,11 @@ public:
|
||||
|
||||
/** Delete owned pointer and assign new one. */
|
||||
void operator=( T * p ) {
|
||||
delete m_ptr;
|
||||
m_ptr = p;
|
||||
if (p != m_ptr)
|
||||
{
|
||||
delete m_ptr;
|
||||
m_ptr = p;
|
||||
}
|
||||
}
|
||||
|
||||
/** Member access. */
|
||||
@ -98,23 +96,125 @@ private:
|
||||
T * m_ptr;
|
||||
};
|
||||
|
||||
#if 0
|
||||
/** Reference counted base class to be used with Pointer.
|
||||
*
|
||||
* The only requirement of the Pointer class is that the RefCounted class implements the
|
||||
* addRef and release methods.
|
||||
*/
|
||||
class RefCounted
|
||||
{
|
||||
NV_FORBID_COPY(RefCounted);
|
||||
public:
|
||||
|
||||
/// Ctor.
|
||||
RefCounted() : m_count(0), m_weak_proxy(NULL)
|
||||
{
|
||||
s_total_obj_count++;
|
||||
}
|
||||
|
||||
/// Virtual dtor.
|
||||
virtual ~RefCounted()
|
||||
{
|
||||
nvCheck( m_count == 0 );
|
||||
nvCheck( s_total_obj_count > 0 );
|
||||
s_total_obj_count--;
|
||||
}
|
||||
|
||||
|
||||
/// Increase reference count.
|
||||
uint addRef() const
|
||||
{
|
||||
s_total_ref_count++;
|
||||
m_count++;
|
||||
return m_count;
|
||||
}
|
||||
|
||||
|
||||
/// Decrease reference count and remove when 0.
|
||||
uint release() const
|
||||
{
|
||||
nvCheck( m_count > 0 );
|
||||
|
||||
s_total_ref_count--;
|
||||
m_count--;
|
||||
if( m_count == 0 ) {
|
||||
releaseWeakProxy();
|
||||
delete this;
|
||||
return 0;
|
||||
}
|
||||
return m_count;
|
||||
}
|
||||
|
||||
/// Get weak proxy.
|
||||
WeakProxy * getWeakProxy() const
|
||||
{
|
||||
if (m_weak_proxy == NULL) {
|
||||
m_weak_proxy = new WeakProxy;
|
||||
m_weak_proxy->AddRef();
|
||||
}
|
||||
return m_weak_proxy;
|
||||
}
|
||||
|
||||
/// Release the weak proxy.
|
||||
void releaseWeakProxy() const
|
||||
{
|
||||
if (m_weak_proxy != NULL) {
|
||||
m_weak_proxy->NotifyObjectDied();
|
||||
m_weak_proxy->Release();
|
||||
m_weak_proxy = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/** @name Debug methods: */
|
||||
//@{
|
||||
/// Get reference count.
|
||||
int refCount() const
|
||||
{
|
||||
return m_count;
|
||||
}
|
||||
|
||||
/// Get total number of objects.
|
||||
static int totalObjectCount()
|
||||
{
|
||||
return s_total_obj_count;
|
||||
}
|
||||
|
||||
/// Get total number of references.
|
||||
static int totalReferenceCount()
|
||||
{
|
||||
return s_total_ref_count;
|
||||
}
|
||||
//@}
|
||||
|
||||
|
||||
private:
|
||||
|
||||
NVCORE_API static int s_total_ref_count;
|
||||
NVCORE_API static int s_total_obj_count;
|
||||
|
||||
mutable int m_count;
|
||||
mutable WeakProxy * weak_proxy;
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
/// Smart pointer template class.
|
||||
template <class BaseClass>
|
||||
class SmartPtr {
|
||||
class Pointer {
|
||||
public:
|
||||
|
||||
// BaseClass must implement addRef() and release().
|
||||
typedef SmartPtr<BaseClass> ThisType;
|
||||
typedef Pointer<BaseClass> ThisType;
|
||||
|
||||
/// Default ctor.
|
||||
SmartPtr() : m_ptr(NULL)
|
||||
Pointer() : m_ptr(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
/** Other type assignment. */
|
||||
template <class OtherBase>
|
||||
SmartPtr( const SmartPtr<OtherBase> & tc )
|
||||
Pointer( const Pointer<OtherBase> & tc )
|
||||
{
|
||||
m_ptr = static_cast<BaseClass *>( tc.ptr() );
|
||||
if( m_ptr ) {
|
||||
@ -123,7 +223,7 @@ public:
|
||||
}
|
||||
|
||||
/** Copy ctor. */
|
||||
SmartPtr( const ThisType & bc )
|
||||
Pointer( const ThisType & bc )
|
||||
{
|
||||
m_ptr = bc.ptr();
|
||||
if( m_ptr ) {
|
||||
@ -131,8 +231,8 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/** Copy cast ctor. SmartPtr(NULL) is valid. */
|
||||
explicit SmartPtr( BaseClass * bc )
|
||||
/** Copy cast ctor. Pointer(NULL) is valid. */
|
||||
explicit Pointer( BaseClass * bc )
|
||||
{
|
||||
m_ptr = bc;
|
||||
if( m_ptr ) {
|
||||
@ -141,7 +241,7 @@ public:
|
||||
}
|
||||
|
||||
/** Dtor. */
|
||||
~SmartPtr()
|
||||
~Pointer()
|
||||
{
|
||||
set(NULL);
|
||||
}
|
||||
@ -175,7 +275,7 @@ public:
|
||||
//@{
|
||||
/** Other type assignment. */
|
||||
template <class OtherBase>
|
||||
void operator = ( const SmartPtr<OtherBase> & tc )
|
||||
void operator = ( const Pointer<OtherBase> & tc )
|
||||
{
|
||||
set( static_cast<BaseClass *>(tc.ptr()) );
|
||||
}
|
||||
@ -198,7 +298,7 @@ public:
|
||||
//@{
|
||||
/** Other type equal comparation. */
|
||||
template <class OtherBase>
|
||||
bool operator == ( const SmartPtr<OtherBase> & other ) const
|
||||
bool operator == ( const Pointer<OtherBase> & other ) const
|
||||
{
|
||||
return m_ptr == other.ptr();
|
||||
}
|
||||
@ -217,7 +317,7 @@ public:
|
||||
|
||||
/** Other type not equal comparation. */
|
||||
template <class OtherBase>
|
||||
bool operator != ( const SmartPtr<OtherBase> & other ) const
|
||||
bool operator != ( const Pointer<OtherBase> & other ) const
|
||||
{
|
||||
return m_ptr != other.ptr();
|
||||
}
|
||||
|
@ -1,9 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#include "RefCounted.h"
|
||||
|
||||
using namespace nv;
|
||||
|
||||
int nv::RefCounted::s_total_ref_count = 0;
|
||||
int nv::RefCounted::s_total_obj_count = 0;
|
||||
|
@ -1,114 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_CORE_REFCOUNTED_H
|
||||
#define NV_CORE_REFCOUNTED_H
|
||||
|
||||
#include <nvcore/nvcore.h>
|
||||
#include <nvcore/Debug.h>
|
||||
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
/// Reference counted base class to be used with SmartPtr and WeakPtr.
|
||||
class RefCounted
|
||||
{
|
||||
NV_FORBID_COPY(RefCounted);
|
||||
public:
|
||||
|
||||
/// Ctor.
|
||||
RefCounted() : m_count(0)/*, m_weak_proxy(NULL)*/
|
||||
{
|
||||
s_total_obj_count++;
|
||||
}
|
||||
|
||||
/// Virtual dtor.
|
||||
virtual ~RefCounted()
|
||||
{
|
||||
nvCheck( m_count == 0 );
|
||||
nvCheck( s_total_obj_count > 0 );
|
||||
s_total_obj_count--;
|
||||
}
|
||||
|
||||
|
||||
/// Increase reference count.
|
||||
uint addRef() const
|
||||
{
|
||||
s_total_ref_count++;
|
||||
m_count++;
|
||||
return m_count;
|
||||
}
|
||||
|
||||
|
||||
/// Decrease reference count and remove when 0.
|
||||
uint release() const
|
||||
{
|
||||
nvCheck( m_count > 0 );
|
||||
|
||||
s_total_ref_count--;
|
||||
m_count--;
|
||||
if( m_count == 0 ) {
|
||||
// releaseWeakProxy();
|
||||
delete this;
|
||||
return 0;
|
||||
}
|
||||
return m_count;
|
||||
}
|
||||
/*
|
||||
/// Get weak proxy.
|
||||
WeakProxy * getWeakProxy() const
|
||||
{
|
||||
if (m_weak_proxy == NULL) {
|
||||
m_weak_proxy = new WeakProxy;
|
||||
m_weak_proxy->AddRef();
|
||||
}
|
||||
return m_weak_proxy;
|
||||
}
|
||||
|
||||
/// Release the weak proxy.
|
||||
void releaseWeakProxy() const
|
||||
{
|
||||
if (m_weak_proxy != NULL) {
|
||||
m_weak_proxy->NotifyObjectDied();
|
||||
m_weak_proxy->Release();
|
||||
m_weak_proxy = NULL;
|
||||
}
|
||||
}
|
||||
*/
|
||||
/** @name Debug methods: */
|
||||
//@{
|
||||
/// Get reference count.
|
||||
int refCount() const
|
||||
{
|
||||
return m_count;
|
||||
}
|
||||
|
||||
/// Get total number of objects.
|
||||
static int totalObjectCount()
|
||||
{
|
||||
return s_total_obj_count;
|
||||
}
|
||||
|
||||
/// Get total number of references.
|
||||
static int totalReferenceCount()
|
||||
{
|
||||
return s_total_ref_count;
|
||||
}
|
||||
//@}
|
||||
|
||||
|
||||
private:
|
||||
|
||||
NVCORE_API static int s_total_ref_count;
|
||||
NVCORE_API static int s_total_obj_count;
|
||||
|
||||
mutable int m_count;
|
||||
// mutable WeakProxy * weak_proxy;
|
||||
|
||||
};
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
#endif // NV_CORE_REFCOUNTED_H
|
@ -47,25 +47,25 @@ public:
|
||||
|
||||
/** @name Stream implementation. */
|
||||
//@{
|
||||
virtual void seek( int pos )
|
||||
virtual void seek( uint pos )
|
||||
{
|
||||
nvDebugCheck(m_fp != NULL);
|
||||
nvDebugCheck(pos >= 0 && pos < size());
|
||||
nvDebugCheck(pos < size());
|
||||
fseek(m_fp, pos, SEEK_SET);
|
||||
}
|
||||
|
||||
virtual int tell() const
|
||||
virtual uint tell() const
|
||||
{
|
||||
nvDebugCheck(m_fp != NULL);
|
||||
return ftell(m_fp);
|
||||
}
|
||||
|
||||
virtual int size() const
|
||||
virtual uint size() const
|
||||
{
|
||||
nvDebugCheck(m_fp != NULL);
|
||||
int pos = ftell(m_fp);
|
||||
uint pos = ftell(m_fp);
|
||||
fseek(m_fp, 0, SEEK_END);
|
||||
int end = ftell(m_fp);
|
||||
uint end = ftell(m_fp);
|
||||
fseek(m_fp, pos, SEEK_SET);
|
||||
return end;
|
||||
}
|
||||
@ -117,11 +117,11 @@ public:
|
||||
/** @name Stream implementation. */
|
||||
//@{
|
||||
/// Write data.
|
||||
virtual void serialize( void * data, int len )
|
||||
virtual uint serialize( void * data, uint len )
|
||||
{
|
||||
nvDebugCheck(data != NULL);
|
||||
nvDebugCheck(m_fp != NULL);
|
||||
fwrite(data, len, 1, m_fp);
|
||||
return (uint)fwrite(data, 1, len, m_fp);
|
||||
}
|
||||
|
||||
virtual bool isLoading() const
|
||||
@ -156,11 +156,11 @@ public:
|
||||
/** @name Stream implementation. */
|
||||
//@{
|
||||
/// Read data.
|
||||
virtual void serialize( void * data, int len )
|
||||
virtual uint serialize( void * data, uint len )
|
||||
{
|
||||
nvDebugCheck(data != NULL);
|
||||
nvDebugCheck(m_fp != NULL);
|
||||
fread(data, len, 1, m_fp);
|
||||
return (uint)fread(data, 1, len, m_fp);
|
||||
}
|
||||
|
||||
virtual bool isLoading() const
|
||||
@ -184,33 +184,40 @@ class NVCORE_CLASS MemoryInputStream : public Stream
|
||||
public:
|
||||
|
||||
/// Ctor.
|
||||
MemoryInputStream( const uint8 * mem, int size ) :
|
||||
MemoryInputStream( const uint8 * mem, uint size ) :
|
||||
m_mem(mem), m_ptr(mem), m_size(size) { }
|
||||
|
||||
/** @name Stream implementation. */
|
||||
//@{
|
||||
/// Read data.
|
||||
virtual void serialize( void * data, int len )
|
||||
virtual uint serialize( void * data, uint len )
|
||||
{
|
||||
nvDebugCheck(data != NULL);
|
||||
nvDebugCheck(!isError());
|
||||
|
||||
uint left = m_size - tell();
|
||||
if (len > left) len = left;
|
||||
|
||||
memcpy( data, m_ptr, len );
|
||||
m_ptr += len;
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
virtual void seek( int pos )
|
||||
virtual void seek( uint pos )
|
||||
{
|
||||
nvDebugCheck(!isError());
|
||||
m_ptr = m_mem + pos;
|
||||
nvDebugCheck(!isError());
|
||||
}
|
||||
|
||||
virtual int tell() const
|
||||
virtual uint tell() const
|
||||
{
|
||||
return int(m_ptr - m_mem);
|
||||
nvDebugCheck(m_ptr >= m_mem);
|
||||
return uint(m_ptr - m_mem);
|
||||
}
|
||||
|
||||
virtual int size() const
|
||||
virtual uint size() const
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
@ -252,7 +259,7 @@ private:
|
||||
|
||||
const uint8 * m_mem;
|
||||
const uint8 * m_ptr;
|
||||
int m_size;
|
||||
uint m_size;
|
||||
|
||||
};
|
||||
|
||||
@ -286,17 +293,19 @@ public:
|
||||
/** @name Stream implementation. */
|
||||
//@{
|
||||
/// Read data.
|
||||
virtual void serialize( void * data, int len )
|
||||
virtual uint serialize( void * data, uint len )
|
||||
{
|
||||
nvDebugCheck(data != NULL);
|
||||
m_s->serialize( data, len );
|
||||
len = m_s->serialize( data, len );
|
||||
|
||||
if( m_s->isError() ) {
|
||||
throw std::exception();
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
virtual void seek( int pos )
|
||||
virtual void seek( uint pos )
|
||||
{
|
||||
m_s->seek( pos );
|
||||
|
||||
@ -305,12 +314,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
virtual int tell() const
|
||||
virtual uint tell() const
|
||||
{
|
||||
return m_s->tell();
|
||||
}
|
||||
|
||||
virtual int size() const
|
||||
virtual uint size() const
|
||||
{
|
||||
return m_s->size();
|
||||
}
|
||||
|
@ -209,48 +209,11 @@ StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL)
|
||||
}
|
||||
|
||||
/** Copy string. */
|
||||
StringBuilder::StringBuilder( const char * s )
|
||||
StringBuilder::StringBuilder( const char * s ) : m_size(0), m_str(NULL)
|
||||
{
|
||||
copy(s);
|
||||
}
|
||||
|
||||
/** Allocate and copy string. */
|
||||
StringBuilder::StringBuilder( int size_hint, const StringBuilder & s) : m_size(size_hint), m_str(NULL)
|
||||
{
|
||||
nvDebugCheck(m_size > 0);
|
||||
m_str = strAlloc(m_size);
|
||||
copy(s);
|
||||
}
|
||||
|
||||
/** Allocate and format string. */
|
||||
StringBuilder::StringBuilder( const char * fmt, ... ) : m_size(0), m_str(NULL)
|
||||
{
|
||||
nvDebugCheck(fmt != NULL);
|
||||
va_list arg;
|
||||
va_start( arg, fmt );
|
||||
|
||||
format( fmt, arg );
|
||||
|
||||
va_end( arg );
|
||||
}
|
||||
|
||||
/** Allocate and format string. */
|
||||
StringBuilder::StringBuilder( int size_hint, const char * fmt, ... ) : m_size(size_hint), m_str(NULL)
|
||||
{
|
||||
nvDebugCheck(m_size > 0);
|
||||
nvDebugCheck(fmt != NULL);
|
||||
|
||||
m_str = strAlloc(m_size);
|
||||
|
||||
va_list arg;
|
||||
va_start( arg, fmt );
|
||||
|
||||
format( fmt, arg );
|
||||
|
||||
va_end( arg );
|
||||
}
|
||||
|
||||
|
||||
/** Delete the string. */
|
||||
StringBuilder::~StringBuilder()
|
||||
{
|
||||
@ -278,8 +241,7 @@ StringBuilder & StringBuilder::format( const char * fmt, ... )
|
||||
/** Format a string safely. */
|
||||
StringBuilder & StringBuilder::format( const char * fmt, va_list arg )
|
||||
{
|
||||
nvCheck(fmt != NULL);
|
||||
nvCheck(m_size >= 0);
|
||||
nvDebugCheck(fmt != NULL);
|
||||
|
||||
if( m_size == 0 ) {
|
||||
m_size = 64;
|
||||
@ -327,8 +289,7 @@ StringBuilder & StringBuilder::format( const char * fmt, va_list arg )
|
||||
/** Append a string. */
|
||||
StringBuilder & StringBuilder::append( const char * s )
|
||||
{
|
||||
nvCheck(s != NULL);
|
||||
nvCheck(m_size >= 0);
|
||||
nvDebugCheck(s != NULL);
|
||||
|
||||
const uint slen = uint(strlen( s ));
|
||||
|
||||
@ -475,31 +436,6 @@ void StringBuilder::reset()
|
||||
}
|
||||
|
||||
|
||||
Path::Path(const char * fmt, ...)
|
||||
{
|
||||
nvDebugCheck( fmt != NULL );
|
||||
|
||||
va_list arg;
|
||||
va_start( arg, fmt );
|
||||
|
||||
format( fmt, arg );
|
||||
|
||||
va_end( arg );
|
||||
}
|
||||
|
||||
Path::Path(int size_hint, const char * fmt, ...) : StringBuilder(size_hint)
|
||||
{
|
||||
nvDebugCheck( fmt != NULL );
|
||||
|
||||
va_list arg;
|
||||
va_start( arg, fmt );
|
||||
|
||||
format( fmt, arg );
|
||||
|
||||
va_end( arg );
|
||||
}
|
||||
|
||||
|
||||
/// Get the file name from a path.
|
||||
const char * Path::fileName() const
|
||||
{
|
||||
@ -609,8 +545,6 @@ const char * Path::extension(const char * str)
|
||||
}
|
||||
|
||||
|
||||
// static
|
||||
String String::s_null(String::null);
|
||||
|
||||
/// Clone this string
|
||||
String String::clone() const
|
||||
@ -621,13 +555,13 @@ String String::clone() const
|
||||
|
||||
void String::setString(const char * str)
|
||||
{
|
||||
if( str == NULL ) {
|
||||
data = s_null.data;
|
||||
if (str == NULL) {
|
||||
data = NULL;
|
||||
}
|
||||
else {
|
||||
allocString( str );
|
||||
addRef();
|
||||
}
|
||||
addRef();
|
||||
}
|
||||
|
||||
void String::setString(const char * str, int length)
|
||||
@ -640,11 +574,11 @@ void String::setString(const char * str, int length)
|
||||
|
||||
void String::setString(const StringBuilder & str)
|
||||
{
|
||||
if( str.str() == NULL ) {
|
||||
data = s_null.data;
|
||||
if (str.str() == NULL) {
|
||||
data = NULL;
|
||||
}
|
||||
else {
|
||||
allocString(str);
|
||||
addRef();
|
||||
}
|
||||
addRef();
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ namespace nv
|
||||
|
||||
uint strHash(const char * str, uint h) NV_PURE;
|
||||
|
||||
/// String hash vased on Bernstein's hash.
|
||||
/// String hash based on Bernstein's hash.
|
||||
inline uint strHash(const char * data, uint h = 5381)
|
||||
{
|
||||
uint i;
|
||||
@ -47,9 +47,6 @@ namespace nv
|
||||
explicit StringBuilder( int size_hint );
|
||||
StringBuilder( const char * str );
|
||||
StringBuilder( const StringBuilder & );
|
||||
StringBuilder( int size_hint, const StringBuilder & );
|
||||
StringBuilder( const char * format, ... ) __attribute__((format (printf, 2, 3)));
|
||||
StringBuilder( int size_hint, const char * format, ... ) __attribute__((format (printf, 3, 4)));
|
||||
|
||||
~StringBuilder();
|
||||
|
||||
@ -120,18 +117,16 @@ namespace nv
|
||||
char * m_str;
|
||||
|
||||
};
|
||||
|
||||
|
||||
/// Path string.
|
||||
|
||||
/// Path string. @@ This should be called PathBuilder.
|
||||
class NVCORE_CLASS Path : public StringBuilder
|
||||
{
|
||||
public:
|
||||
Path() : StringBuilder() {}
|
||||
explicit Path(int size_hint) : StringBuilder(size_hint) {}
|
||||
Path(const StringBuilder & str) : StringBuilder(str) {}
|
||||
Path(int size_hint, const StringBuilder & str) : StringBuilder(size_hint, str) {}
|
||||
Path(const char * format, ...) __attribute__((format (printf, 2, 3)));
|
||||
Path(int size_hint, const char * format, ...) __attribute__((format (printf, 3, 4)));
|
||||
Path(const char * str) : StringBuilder(str) {}
|
||||
Path(const Path & path) : StringBuilder(path) {}
|
||||
|
||||
const char * fileName() const;
|
||||
const char * extension() const;
|
||||
@ -140,7 +135,7 @@ namespace nv
|
||||
|
||||
void stripFileName();
|
||||
void stripExtension();
|
||||
|
||||
|
||||
// statics
|
||||
NVCORE_API static char separator();
|
||||
NVCORE_API static const char * fileName(const char *);
|
||||
@ -156,15 +151,14 @@ namespace nv
|
||||
/// Constructs a null string. @sa isNull()
|
||||
String()
|
||||
{
|
||||
data = s_null.data;
|
||||
addRef();
|
||||
data = NULL;
|
||||
}
|
||||
|
||||
/// Constructs a shared copy of str.
|
||||
String(const String & str)
|
||||
{
|
||||
data = str.data;
|
||||
addRef();
|
||||
if (data != NULL) addRef();
|
||||
}
|
||||
|
||||
/// Constructs a shared string from a standard string.
|
||||
@ -188,7 +182,6 @@ namespace nv
|
||||
/// Dtor.
|
||||
~String()
|
||||
{
|
||||
nvDebugCheck(data != NULL);
|
||||
release();
|
||||
}
|
||||
|
||||
@ -213,52 +206,61 @@ namespace nv
|
||||
/// Implement value semantics.
|
||||
String & operator=( const String & str )
|
||||
{
|
||||
release();
|
||||
data = str.data;
|
||||
addRef();
|
||||
if (str.data != data)
|
||||
{
|
||||
release();
|
||||
data = str.data;
|
||||
addRef();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Equal operator.
|
||||
bool operator==( const String & str ) const
|
||||
{
|
||||
nvDebugCheck(data != NULL);
|
||||
nvDebugCheck(str.data != NULL);
|
||||
if( str.data == data ) {
|
||||
return true;
|
||||
}
|
||||
if ((data == NULL) != (str.data == NULL)) {
|
||||
return false;
|
||||
}
|
||||
return strcmp(data, str.data) == 0;
|
||||
}
|
||||
|
||||
/// Equal operator.
|
||||
bool operator==( const char * str ) const
|
||||
{
|
||||
nvDebugCheck(data != NULL);
|
||||
nvCheck(str != NULL); // Use isNull!
|
||||
if (data == NULL) {
|
||||
return false;
|
||||
}
|
||||
return strcmp(data, str) == 0;
|
||||
}
|
||||
|
||||
/// Not equal operator.
|
||||
bool operator!=( const String & str ) const
|
||||
{
|
||||
nvDebugCheck(data != NULL);
|
||||
nvDebugCheck(str.data != NULL);
|
||||
if( str.data == data ) {
|
||||
return false;
|
||||
}
|
||||
if ((data == NULL) != (str.data == NULL)) {
|
||||
return true;
|
||||
}
|
||||
return strcmp(data, str.data) != 0;
|
||||
}
|
||||
|
||||
/// Not equal operator.
|
||||
bool operator!=( const char * str ) const
|
||||
{
|
||||
nvDebugCheck(data != NULL);
|
||||
nvCheck(str != NULL); // Use isNull!
|
||||
if (data == NULL) {
|
||||
return false;
|
||||
}
|
||||
return strcmp(data, str) != 0;
|
||||
}
|
||||
|
||||
/// Returns true if this string is the null string.
|
||||
bool isNull() const { nvDebugCheck(data != NULL); return data == s_null.data; }
|
||||
bool isNull() const { return data == NULL; }
|
||||
|
||||
/// Return the exact length.
|
||||
uint length() const { nvDebugCheck(data != NULL); return uint(strlen(data)); }
|
||||
@ -267,44 +269,45 @@ namespace nv
|
||||
uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
|
||||
|
||||
/// const char * cast operator.
|
||||
operator const char * () const { nvDebugCheck(data != NULL); return data; }
|
||||
operator const char * () const { return data; }
|
||||
|
||||
/// Get string pointer.
|
||||
const char * str() const { nvDebugCheck(data != NULL); return data; }
|
||||
const char * str() const { return data; }
|
||||
|
||||
|
||||
private:
|
||||
|
||||
enum null_t { null };
|
||||
|
||||
// Private constructor for null string.
|
||||
String(null_t) {
|
||||
setString("");
|
||||
}
|
||||
|
||||
// Add reference count.
|
||||
void addRef() {
|
||||
nvDebugCheck(data != NULL);
|
||||
setRefCount(getRefCount() + 1);
|
||||
}
|
||||
|
||||
// Decrease reference count.
|
||||
void release() {
|
||||
nvDebugCheck(data != NULL);
|
||||
|
||||
const uint16 count = getRefCount();
|
||||
setRefCount(count - 1);
|
||||
if( count - 1 == 0 ) {
|
||||
mem::free(data - 2);
|
||||
data = NULL;
|
||||
void addRef()
|
||||
{
|
||||
if (data != NULL)
|
||||
{
|
||||
setRefCount(getRefCount() + 1);
|
||||
}
|
||||
}
|
||||
|
||||
uint16 getRefCount() const {
|
||||
// Decrease reference count.
|
||||
void release()
|
||||
{
|
||||
if (data != NULL)
|
||||
{
|
||||
const uint16 count = getRefCount();
|
||||
setRefCount(count - 1);
|
||||
if (count - 1 == 0) {
|
||||
mem::free(data - 2);
|
||||
data = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint16 getRefCount() const
|
||||
{
|
||||
nvDebugCheck(data != NULL);
|
||||
return *reinterpret_cast<const uint16 *>(data - 2);
|
||||
}
|
||||
|
||||
void setRefCount(uint16 count) {
|
||||
nvDebugCheck(data != NULL);
|
||||
nvCheck(count < 0xFFFF);
|
||||
*reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
|
||||
}
|
||||
@ -343,8 +346,6 @@ namespace nv
|
||||
|
||||
private:
|
||||
|
||||
NVCORE_API static String s_null;
|
||||
|
||||
const char * data;
|
||||
|
||||
};
|
||||
|
@ -41,17 +41,17 @@ public:
|
||||
ByteOrder byteOrder() const { return m_byteOrder; }
|
||||
|
||||
|
||||
/// Serialize the given data. @@ Should return bytes serialized?
|
||||
virtual void serialize( void * data, int len ) = 0;
|
||||
/// Serialize the given data.
|
||||
virtual uint serialize( void * data, uint len ) = 0;
|
||||
|
||||
/// Move to the given position in the archive.
|
||||
virtual void seek( int pos ) = 0;
|
||||
virtual void seek( uint pos ) = 0;
|
||||
|
||||
/// Return the current position in the archive.
|
||||
virtual int tell() const = 0;
|
||||
virtual uint tell() const = 0;
|
||||
|
||||
/// Return the current size of the archive.
|
||||
virtual int size() const = 0;
|
||||
virtual uint size() const = 0;
|
||||
|
||||
/// Determine if there has been any error.
|
||||
virtual bool isError() const = 0;
|
||||
@ -136,13 +136,13 @@ public:
|
||||
protected:
|
||||
|
||||
/// Serialize in the stream byte order.
|
||||
Stream & byteOrderSerialize( void * v, int len ) {
|
||||
Stream & byteOrderSerialize( void * v, uint len ) {
|
||||
if( m_byteOrder == getSystemByteOrder() ) {
|
||||
serialize( v, len );
|
||||
}
|
||||
else {
|
||||
for( int i=len-1; i>=0; i-- ) {
|
||||
serialize( (uint8 *)v + i, 1 );
|
||||
for( uint i = len; i > 0; i-- ) {
|
||||
serialize( (uint8 *)v + i - 1, 1 );
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
|
@ -48,7 +48,7 @@ const char * TextReader::readToEnd()
|
||||
m_text.reserve(size + 1);
|
||||
m_text.resize(size);
|
||||
|
||||
m_stream->serialize(m_text.mutableBuffer(), size);
|
||||
m_stream->serialize(m_text.unsecureBuffer(), size);
|
||||
m_text.pushBack('\0');
|
||||
|
||||
return m_text.buffer();
|
||||
|
@ -22,7 +22,6 @@
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <nvcore/Stream.h>
|
||||
#include <nvcore/Containers.h> // swap
|
||||
|
||||
#include "ColorBlock.h"
|
||||
#include "BlockDXT.h"
|
||||
|
@ -1,6 +1,5 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#include <nvcore/Containers.h> // swap
|
||||
#include <nvmath/Box.h>
|
||||
#include <nvimage/ColorBlock.h>
|
||||
#include <nvimage/Image.h>
|
||||
|
@ -532,7 +532,7 @@ DDSHeader::DDSHeader()
|
||||
|
||||
// Store version information on the reserved header attributes.
|
||||
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
|
||||
this->reserved[10] = (0 << 16) | (9 << 8) | (5); // major.minor.revision
|
||||
this->reserved[10] = (2 << 16) | (0 << 8) | (7); // major.minor.revision
|
||||
|
||||
this->pf.size = 32;
|
||||
this->pf.flags = 0;
|
||||
@ -989,10 +989,10 @@ void DirectDrawSurface::readLinearImage(Image * img)
|
||||
stream->serialize(&c, byteCount);
|
||||
|
||||
Color32 pixel(0, 0, 0, 0xFF);
|
||||
pixel.r = PixelFormat::convert(c >> rshift, rsize, 8);
|
||||
pixel.g = PixelFormat::convert(c >> gshift, gsize, 8);
|
||||
pixel.b = PixelFormat::convert(c >> bshift, bsize, 8);
|
||||
pixel.a = PixelFormat::convert(c >> ashift, asize, 8);
|
||||
pixel.r = PixelFormat::convert((c & header.pf.rmask) >> rshift, rsize, 8);
|
||||
pixel.g = PixelFormat::convert((c & header.pf.gmask) >> gshift, gsize, 8);
|
||||
pixel.b = PixelFormat::convert((c & header.pf.bmask) >> bshift, bsize, 8);
|
||||
pixel.a = PixelFormat::convert((c & header.pf.amask) >> ashift, asize, 8);
|
||||
|
||||
img->pixel(x, y) = pixel;
|
||||
}
|
||||
|
@ -26,18 +26,17 @@
|
||||
* http://www.xmission.com/~legalize/zoom.html
|
||||
*
|
||||
* Reconstruction Filters in Computer Graphics
|
||||
* http://www.mentallandscape.com/Papers_siggraph88.pdf
|
||||
* http://www.mentallandscape.com/Papers_siggraph88.pdf
|
||||
*
|
||||
* More references:
|
||||
* http://www.worldserver.com/turk/computergraphics/ResamplingFilters.pdf
|
||||
* http://www.worldserver.com/turk/computergraphics/ResamplingFilters.pdf
|
||||
* http://www.dspguide.com/ch16.htm
|
||||
*/
|
||||
|
||||
#include "Filter.h"
|
||||
|
||||
#include <nvcore/Containers.h> // swap
|
||||
#include <nvmath/nvmath.h> // fabs
|
||||
#include <nvmath/Vector.h> // Vector4
|
||||
#include <nvimage/Filter.h>
|
||||
#include <nvcore/Containers.h> // swap
|
||||
|
||||
using namespace nv;
|
||||
|
||||
@ -244,7 +243,7 @@ SincFilter::SincFilter(float w) : Filter(w) {}
|
||||
|
||||
float SincFilter::evaluate(float x) const
|
||||
{
|
||||
return 0.0f;
|
||||
return sincf(PI * x);
|
||||
}
|
||||
|
||||
|
||||
@ -504,7 +503,7 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
|
||||
|
||||
for (int i = 0; i < 7; i++) {
|
||||
for (int e = 0; e < 7; e++) {
|
||||
m_data[i * 9 + e + 1] += elements[i * 7 + e] * scale.z();
|
||||
m_data[(i + 1) * 9 + e + 1] += elements[i * 7 + e] * scale.z();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -519,7 +518,7 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (int e = 0; e < 5; e++) {
|
||||
m_data[i * 9 + e + 2] += elements[i * 5 + e] * scale.y();
|
||||
m_data[(i + 2) * 9 + e + 2] += elements[i * 5 + e] * scale.y();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -532,7 +531,7 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
for (int e = 0; e < 3; e++) {
|
||||
m_data[i * 9 + e + 3] += elements[i * 3 + e] * scale.x();
|
||||
m_data[(i + 3) * 9 + e + 3] += elements[i * 3 + e] * scale.x();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -541,12 +540,17 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
|
||||
|
||||
PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples/*= 32*/)
|
||||
{
|
||||
nvCheck(srcLength >= dstLength); // @@ Upsampling not implemented!
|
||||
nvDebugCheck(samples > 0);
|
||||
|
||||
const float scale = float(dstLength) / float(srcLength);
|
||||
|
||||
float scale = float(dstLength) / float(srcLength);
|
||||
const float iscale = 1.0f / scale;
|
||||
|
||||
if (scale > 1) {
|
||||
// Upsampling.
|
||||
samples = 1;
|
||||
scale = 1;
|
||||
}
|
||||
|
||||
m_length = dstLength;
|
||||
m_width = f.width() * iscale;
|
||||
m_windowSize = (int)ceilf(m_width * 2) + 1;
|
||||
|
@ -78,7 +78,7 @@ namespace nv
|
||||
MitchellFilter();
|
||||
virtual float evaluate(float x) const;
|
||||
|
||||
void setParameters(float a, float b);
|
||||
void setParameters(float b, float c);
|
||||
|
||||
private:
|
||||
float p0, p2, p3;
|
||||
|
@ -1,16 +1,18 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#include <nvcore/Containers.h>
|
||||
#include <nvcore/Ptr.h>
|
||||
|
||||
#include <nvmath/Color.h>
|
||||
|
||||
#include "FloatImage.h"
|
||||
#include "Filter.h"
|
||||
#include "Image.h"
|
||||
|
||||
#include <nvmath/Color.h>
|
||||
#include <nvmath/Matrix.h>
|
||||
|
||||
#include <nvcore/Containers.h>
|
||||
#include <nvcore/Ptr.h>
|
||||
|
||||
#include <math.h>
|
||||
|
||||
|
||||
using namespace nv;
|
||||
|
||||
namespace
|
||||
@ -140,7 +142,8 @@ Image * FloatImage::createImageGammaCorrect(float gamma/*= 2.2f*/) const
|
||||
/// Allocate a 2d float image of the given format and the given extents.
|
||||
void FloatImage::allocate(uint c, uint w, uint h)
|
||||
{
|
||||
nvCheck(m_mem == NULL);
|
||||
free();
|
||||
|
||||
m_width = w;
|
||||
m_height = h;
|
||||
m_componentNum = c;
|
||||
@ -151,7 +154,6 @@ void FloatImage::allocate(uint c, uint w, uint h)
|
||||
/// Free the image, but don't clear the members.
|
||||
void FloatImage::free()
|
||||
{
|
||||
nvCheck(m_mem != NULL);
|
||||
nv::mem::free( reinterpret_cast<void *>(m_mem) );
|
||||
m_mem = NULL;
|
||||
}
|
||||
@ -376,7 +378,7 @@ FloatImage * FloatImage::fastDownSample() const
|
||||
{
|
||||
const uint n = w * h;
|
||||
|
||||
if (n & 1)
|
||||
if ((m_width * m_height) & 1)
|
||||
{
|
||||
const float scale = 1.0f / (2 * n + 1);
|
||||
|
||||
@ -540,73 +542,27 @@ FloatImage * FloatImage::fastDownSample() const
|
||||
return dst_image.release();
|
||||
}
|
||||
|
||||
/*
|
||||
/// Downsample applying a 1D kernel separately in each dimension.
|
||||
FloatImage * FloatImage::downSample(const Kernel1 & kernel, WrapMode wm) const
|
||||
{
|
||||
const uint w = max(1, m_width / 2);
|
||||
const uint h = max(1, m_height / 2);
|
||||
|
||||
return downSample(kernel, w, h, wm);
|
||||
}
|
||||
|
||||
|
||||
/// Downsample applying a 1D kernel separately in each dimension.
|
||||
FloatImage * FloatImage::downSample(const Kernel1 & kernel, uint w, uint h, WrapMode wm) const
|
||||
{
|
||||
nvCheck(!(kernel.windowSize() & 1)); // Make sure that kernel m_width is even.
|
||||
|
||||
AutoPtr<FloatImage> tmp_image( new FloatImage() );
|
||||
tmp_image->allocate(m_componentNum, w, m_height);
|
||||
|
||||
AutoPtr<FloatImage> dst_image( new FloatImage() );
|
||||
dst_image->allocate(m_componentNum, w, h);
|
||||
|
||||
const float xscale = float(m_width) / float(w);
|
||||
const float yscale = float(m_height) / float(h);
|
||||
|
||||
for(uint c = 0; c < m_componentNum; c++) {
|
||||
float * tmp_channel = tmp_image->channel(c);
|
||||
|
||||
for(uint y = 0; y < m_height; y++) {
|
||||
for(uint x = 0; x < w; x++) {
|
||||
|
||||
float sum = this->applyKernelHorizontal(&kernel, uint(x*xscale), y, c, wm);
|
||||
|
||||
const uint tmp_index = tmp_image->index(x, y);
|
||||
tmp_channel[tmp_index] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
float * dst_channel = dst_image->channel(c);
|
||||
|
||||
for(uint y = 0; y < h; y++) {
|
||||
for(uint x = 0; x < w; x++) {
|
||||
|
||||
float sum = tmp_image->applyKernelVertical(&kernel, uint(x*xscale), uint(y*yscale), c, wm);
|
||||
|
||||
const uint dst_index = dst_image->index(x, y);
|
||||
dst_channel[dst_index] = sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dst_image.release();
|
||||
}
|
||||
*/
|
||||
|
||||
/// Downsample applying a 1D kernel separately in each dimension.
|
||||
FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm) const
|
||||
{
|
||||
const uint w = max(1, m_width / 2);
|
||||
const uint h = max(1, m_height / 2);
|
||||
|
||||
return downSample(filter, w, h, wm);
|
||||
return resize(filter, w, h, wm);
|
||||
}
|
||||
|
||||
/// Downsample applying a 1D kernel separately in each dimension.
|
||||
FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm, uint alpha) const
|
||||
{
|
||||
const uint w = max(1, m_width / 2);
|
||||
const uint h = max(1, m_height / 2);
|
||||
|
||||
return resize(filter, w, h, wm, alpha);
|
||||
}
|
||||
|
||||
|
||||
/// Downsample applying a 1D kernel separately in each dimension.
|
||||
FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapMode wm) const
|
||||
FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm) const
|
||||
{
|
||||
// @@ Use monophase filters when frac(m_width / w) == 0
|
||||
|
||||
@ -636,7 +592,7 @@ FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapM
|
||||
float * dst_channel = dst_image->channel(c);
|
||||
|
||||
for (uint x = 0; x < w; x++) {
|
||||
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer());
|
||||
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer());
|
||||
|
||||
for (uint y = 0; y < h; y++) {
|
||||
dst_channel[y * w + x] = tmp_column[y];
|
||||
@ -657,7 +613,7 @@ FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapM
|
||||
float * tmp_channel = tmp_image->channel(c);
|
||||
|
||||
for (uint x = 0; x < w; x++) {
|
||||
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer());
|
||||
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer());
|
||||
|
||||
for (uint y = 0; y < h; y++) {
|
||||
tmp_channel[y * w + x] = tmp_column[y];
|
||||
@ -675,10 +631,56 @@ FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapM
|
||||
return dst_image.release();
|
||||
}
|
||||
|
||||
/// Downsample applying a 1D kernel separately in each dimension.
|
||||
FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const
|
||||
{
|
||||
nvCheck(alpha < m_componentNum);
|
||||
|
||||
AutoPtr<FloatImage> tmp_image( new FloatImage() );
|
||||
AutoPtr<FloatImage> dst_image( new FloatImage() );
|
||||
|
||||
PolyphaseKernel xkernel(filter, m_width, w, 32);
|
||||
PolyphaseKernel ykernel(filter, m_height, h, 32);
|
||||
|
||||
{
|
||||
tmp_image->allocate(m_componentNum, w, m_height);
|
||||
dst_image->allocate(m_componentNum, w, h);
|
||||
|
||||
Array<float> tmp_column(h);
|
||||
tmp_column.resize(h);
|
||||
|
||||
for (uint c = 0; c < m_componentNum; c++)
|
||||
{
|
||||
float * tmp_channel = tmp_image->channel(c);
|
||||
|
||||
for (uint y = 0; y < m_height; y++) {
|
||||
this->applyKernelHorizontal(xkernel, y, c, alpha, wm, tmp_channel + y * w);
|
||||
}
|
||||
}
|
||||
|
||||
// Process all channels before applying vertical kernel to make sure alpha has been computed.
|
||||
|
||||
for (uint c = 0; c < m_componentNum; c++)
|
||||
{
|
||||
float * dst_channel = dst_image->channel(c);
|
||||
|
||||
for (uint x = 0; x < w; x++) {
|
||||
tmp_image->applyKernelVertical(ykernel, x, c, alpha, wm, tmp_column.unsecureBuffer());
|
||||
|
||||
for (uint y = 0; y < h; y++) {
|
||||
dst_channel[y * w + x] = tmp_column[y];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dst_image.release();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Apply 2D kernel at the given coordinates and return result.
|
||||
float FloatImage::applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode wm) const
|
||||
float FloatImage::applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const
|
||||
{
|
||||
nvDebugCheck(k != NULL);
|
||||
|
||||
@ -707,7 +709,7 @@ float FloatImage::applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode w
|
||||
|
||||
|
||||
/// Apply 1D vertical kernel at the given coordinates and return result.
|
||||
float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, int c, WrapMode wm) const
|
||||
float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const
|
||||
{
|
||||
nvDebugCheck(k != NULL);
|
||||
|
||||
@ -729,7 +731,7 @@ float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, int c, Wr
|
||||
}
|
||||
|
||||
/// Apply 1D horizontal kernel at the given coordinates and return result.
|
||||
float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, int c, WrapMode wm) const
|
||||
float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const
|
||||
{
|
||||
nvDebugCheck(k != NULL);
|
||||
|
||||
@ -752,7 +754,7 @@ float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, int c,
|
||||
|
||||
|
||||
/// Apply 1D vertical kernel at the given coordinates and return result.
|
||||
void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, int c, WrapMode wm, float * output) const
|
||||
void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * __restrict output) const
|
||||
{
|
||||
const uint length = k.length();
|
||||
const float scale = float(length) / float(m_height);
|
||||
@ -784,7 +786,7 @@ void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, int c, Wr
|
||||
}
|
||||
|
||||
/// Apply 1D horizontal kernel at the given coordinates and return result.
|
||||
void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, int c, WrapMode wm, float * output) const
|
||||
void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * __restrict output) const
|
||||
{
|
||||
const uint length = k.length();
|
||||
const float scale = float(length) / float(m_width);
|
||||
@ -815,3 +817,93 @@ void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, int c,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Apply 1D vertical kernel at the given coordinates and return result.
|
||||
void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * __restrict output) const
|
||||
{
|
||||
const uint length = k.length();
|
||||
const float scale = float(length) / float(m_height);
|
||||
const float iscale = 1.0f / scale;
|
||||
|
||||
const float width = k.width();
|
||||
const int windowSize = k.windowSize();
|
||||
|
||||
const float * channel = this->channel(c);
|
||||
const float * alpha = this->channel(a);
|
||||
|
||||
for (uint i = 0; i < length; i++)
|
||||
{
|
||||
const float center = (0.5f + i) * iscale;
|
||||
|
||||
const int left = (int)floorf(center - width);
|
||||
const int right = (int)ceilf(center + width);
|
||||
nvCheck(right - left <= windowSize);
|
||||
|
||||
float norm = 0;
|
||||
float sum = 0;
|
||||
for (int j = 0; j < windowSize; ++j)
|
||||
{
|
||||
const int idx = this->index(x, j+left, wm);
|
||||
|
||||
float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f));
|
||||
norm += w;
|
||||
sum += w * channel[idx];
|
||||
}
|
||||
|
||||
output[i] = sum / norm;
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply 1D horizontal kernel at the given coordinates and return result.
|
||||
void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * __restrict output) const
|
||||
{
|
||||
const uint length = k.length();
|
||||
const float scale = float(length) / float(m_width);
|
||||
const float iscale = 1.0f / scale;
|
||||
|
||||
const float width = k.width();
|
||||
const int windowSize = k.windowSize();
|
||||
|
||||
const float * channel = this->channel(c);
|
||||
const float * alpha = this->channel(a);
|
||||
|
||||
for (uint i = 0; i < length; i++)
|
||||
{
|
||||
const float center = (0.5f + i) * iscale;
|
||||
|
||||
const int left = (int)floorf(center - width);
|
||||
const int right = (int)ceilf(center + width);
|
||||
nvDebugCheck(right - left <= windowSize);
|
||||
|
||||
float norm = 0.0f;
|
||||
float sum = 0;
|
||||
for (int j = 0; j < windowSize; ++j)
|
||||
{
|
||||
const int idx = this->index(left + j, y, wm);
|
||||
|
||||
float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f));
|
||||
norm += w;
|
||||
sum += w * channel[idx];
|
||||
}
|
||||
|
||||
output[i] = sum / norm;
|
||||
}
|
||||
}
|
||||
|
||||
FloatImage* FloatImage::clone() const
|
||||
{
|
||||
FloatImage* copy = new FloatImage();
|
||||
copy->m_width = m_width;
|
||||
copy->m_height = m_height;
|
||||
copy->m_componentNum = m_componentNum;
|
||||
copy->m_count = m_count;
|
||||
|
||||
if(m_mem)
|
||||
{
|
||||
copy->allocate(m_componentNum, m_width, m_height);
|
||||
memcpy(copy->m_mem, m_mem, m_count * sizeof(float));
|
||||
}
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
|
@ -3,14 +3,20 @@
|
||||
#ifndef NV_IMAGE_FLOATIMAGE_H
|
||||
#define NV_IMAGE_FLOATIMAGE_H
|
||||
|
||||
#include <stdlib.h> // abs
|
||||
#include <nvimage/nvimage.h>
|
||||
|
||||
#include <nvmath/Vector.h>
|
||||
|
||||
#include <nvcore/Debug.h>
|
||||
#include <nvcore/Algorithms.h> // clamp
|
||||
#include <nvimage/nvimage.h>
|
||||
#include <nvcore/Containers.h> // clamp
|
||||
|
||||
#include <stdlib.h> // abs
|
||||
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class Vector4;
|
||||
class Matrix;
|
||||
class Image;
|
||||
class Filter;
|
||||
class Kernel1;
|
||||
@ -62,20 +68,22 @@ public:
|
||||
NVIMAGE_API void toGamma(uint base_component, uint num, float gamma = 2.2f);
|
||||
NVIMAGE_API void exponentiate(uint base_component, uint num, float power);
|
||||
|
||||
|
||||
|
||||
NVIMAGE_API FloatImage * fastDownSample() const;
|
||||
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const;
|
||||
NVIMAGE_API FloatImage * downSample(const Filter & filter, uint w, uint h, WrapMode wm) const;
|
||||
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const;
|
||||
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const;
|
||||
|
||||
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const;
|
||||
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const;
|
||||
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const;
|
||||
//@}
|
||||
|
||||
NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode wm) const;
|
||||
NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, int c, WrapMode wm) const;
|
||||
NVIMAGE_API float applyKernelHorizontal(const Kernel1 * k, int x, int y, int c, WrapMode wm) const;
|
||||
NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, int c, WrapMode wm, float * output) const;
|
||||
NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, int c, WrapMode wm, float * output) const;
|
||||
NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const;
|
||||
NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const;
|
||||
NVIMAGE_API float applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const;
|
||||
NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * output) const;
|
||||
NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * output) const;
|
||||
NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * output) const;
|
||||
NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * output) const;
|
||||
|
||||
|
||||
uint width() const { return m_width; }
|
||||
@ -111,6 +119,9 @@ public:
|
||||
float sampleLinearMirror(float x, float y, int c) const;
|
||||
//@}
|
||||
|
||||
|
||||
FloatImage* clone() const;
|
||||
|
||||
public:
|
||||
|
||||
uint index(uint x, uint y) const;
|
||||
@ -228,11 +239,15 @@ inline uint FloatImage::indexRepeat(int x, int y) const
|
||||
|
||||
inline uint FloatImage::indexMirror(int x, int y) const
|
||||
{
|
||||
if (m_width == 1) x = 0;
|
||||
|
||||
x = abs(x);
|
||||
while (x >= m_width) {
|
||||
x = abs(m_width + m_width - x - 2);
|
||||
}
|
||||
|
||||
if (m_height == 1) y = 0;
|
||||
|
||||
y = abs(y);
|
||||
while (y >= m_height) {
|
||||
y = abs(m_height + m_height - y - 2);
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include <nvcore/Debug.h>
|
||||
#include <nvcore/Ptr.h>
|
||||
#include <nvcore/Containers.h> // swap
|
||||
|
||||
#include <nvmath/Color.h>
|
||||
|
||||
@ -79,7 +78,7 @@ void Image::unwrap()
|
||||
|
||||
void Image::free()
|
||||
{
|
||||
::free(m_data);
|
||||
nv::mem::free(m_data);
|
||||
m_data = NULL;
|
||||
}
|
||||
|
||||
|
@ -162,9 +162,6 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s)
|
||||
if (strCaseCmp(extension, ".pfm") == 0) {
|
||||
return loadFloatPFM(fileName, s);
|
||||
}
|
||||
if (strCaseCmp(extension, ".hdr") == 0) {
|
||||
return loadGridFloat(fileName, s);
|
||||
}
|
||||
*/
|
||||
|
||||
return NULL;
|
||||
@ -797,7 +794,7 @@ Image * nv::ImageIO::loadJPG(Stream & s)
|
||||
// Read the entire file.
|
||||
Array<uint8> byte_array;
|
||||
byte_array.resize(s.size());
|
||||
s.serialize(byte_array.mutableBuffer(), s.size());
|
||||
s.serialize(byte_array.unsecureBuffer(), s.size());
|
||||
|
||||
jpeg_decompress_struct cinfo;
|
||||
jpeg_error_mgr jerr;
|
||||
@ -1092,8 +1089,7 @@ namespace
|
||||
|
||||
virtual void seekg(Imf::Int64 pos)
|
||||
{
|
||||
nvDebugCheck(pos >= 0 && pos < UINT_MAX);
|
||||
m_stream.seek((uint)pos);
|
||||
m_stream.seek(pos);
|
||||
}
|
||||
|
||||
virtual void clear()
|
||||
@ -1302,77 +1298,6 @@ bool nv::ImageIO::saveFloatPFM(const char * fileName, const FloatImage * fimage,
|
||||
return true;
|
||||
}
|
||||
|
||||
#pragma warning(disable : 4996)
|
||||
|
||||
NVIMAGE_API FloatImage * nv::ImageIO::loadGridFloat(const char * fileName, Stream & s)
|
||||
{
|
||||
nvCheck(s.isLoading());
|
||||
nvCheck(!s.isError());
|
||||
|
||||
Tokenizer parser(&s);
|
||||
|
||||
parser.nextLine();
|
||||
|
||||
if (parser.token() != "ncols")
|
||||
{
|
||||
nvDebug("Failed to find 'ncols' token in file '%s'.\n", fileName);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
parser.nextToken(true);
|
||||
const int nCols = parser.token().toInt();
|
||||
|
||||
parser.nextToken(true);
|
||||
if (parser.token() != "nrows")
|
||||
{
|
||||
nvDebug("Failed to find 'nrows' token in file '%s'.\n", fileName);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
parser.nextToken(true);
|
||||
const int nRows = parser.token().toInt();
|
||||
|
||||
/* There's a byte order defined in the header. We could read it. However, here we
|
||||
just assume that it matches the platform's byte order.
|
||||
// There is then a bunch of data that we don't care about (lat, long definitions, etc).
|
||||
for (int i=0; i!=9; ++i)
|
||||
parser.nextToken(true);
|
||||
|
||||
if (parser.token() != "byteorder")
|
||||
return NULL;
|
||||
|
||||
parser.nextToken(true);
|
||||
|
||||
const Stream::ByteOrder byteOrder = (parser.token() == "LSBFIRST")? Stream::LittleEndian: Stream::BigEndian;
|
||||
*/
|
||||
|
||||
// GridFloat comes in two files: an ASCII header which was parsed above (.hdr) and a big blob
|
||||
// of binary data in a .flt file.
|
||||
Path dataPath(fileName);
|
||||
dataPath.stripExtension();
|
||||
dataPath.append(".flt");
|
||||
|
||||
// Open the binary data.
|
||||
FILE* file = fopen(dataPath.fileName(), "rb");
|
||||
if (!file)
|
||||
{
|
||||
nvDebug("Failed to find GridFloat blob file '%s' corresponding to '%s'.\n", dataPath.fileName(), fileName);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Allocate image.
|
||||
AutoPtr<FloatImage> fimage(new FloatImage());
|
||||
fimage->allocate(1, nCols, nRows);
|
||||
|
||||
float * channel = fimage->channel(0);
|
||||
|
||||
// The binary blob is defined to be in row-major order, containing IEEE floats.
|
||||
// So we can just slurp it in. Theoretically, we ought to use the byte order.
|
||||
const size_t nRead = fread((void*) channel, sizeof(float), nRows * nCols, file);
|
||||
fclose(file);
|
||||
|
||||
return fimage.release();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
|
@ -47,15 +47,10 @@ namespace nv
|
||||
|
||||
NVIMAGE_API bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
|
||||
#endif
|
||||
/*
|
||||
NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s);
|
||||
NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
|
||||
|
||||
// GridFloat is a simple, open format for terrain elevation data. See http://ned.usgs.gov/Ned/faq.asp.
|
||||
// Expects: 1) fileName will be an ".hdr" header file, 2) there will also exist a corresponding float data
|
||||
// blob in a ".flt" file. (This is what USGS gives you.)
|
||||
NVIMAGE_API FloatImage * loadGridFloat(const char * fileName, Stream & s);
|
||||
*/
|
||||
// NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s);
|
||||
// NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
|
||||
|
||||
} // ImageIO namespace
|
||||
|
||||
} // nv namespace
|
||||
|
@ -21,15 +21,16 @@
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <nvcore/Ptr.h>
|
||||
|
||||
#include <nvmath/Color.h>
|
||||
|
||||
#include <nvimage/NormalMap.h>
|
||||
#include <nvimage/Filter.h>
|
||||
#include <nvimage/FloatImage.h>
|
||||
#include <nvimage/Image.h>
|
||||
|
||||
#include <nvmath/Color.h>
|
||||
|
||||
#include <nvcore/Ptr.h>
|
||||
|
||||
|
||||
using namespace nv;
|
||||
|
||||
// Create normal map using the given kernels.
|
||||
|
@ -39,7 +39,7 @@ namespace nv
|
||||
bool isSupported() const
|
||||
{
|
||||
if (version != 1) {
|
||||
printf("*** bad version number %u\n", version);
|
||||
nvDebug("*** bad version number %u\n", version);
|
||||
return false;
|
||||
}
|
||||
if (channel_count > 4) {
|
||||
|
@ -12,14 +12,14 @@ http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT
|
||||
@@ This code needs to be reviewed, I'm not sure it's correct.
|
||||
*/
|
||||
|
||||
#include <string.h> // memset
|
||||
|
||||
#include <nvcore/Containers.h> // swap
|
||||
#include <nvimage/Quantize.h>
|
||||
#include <nvimage/Image.h>
|
||||
#include <nvimage/PixelFormat.h>
|
||||
|
||||
#include <nvmath/Color.h>
|
||||
|
||||
#include <nvimage/Image.h>
|
||||
#include <nvimage/Quantize.h>
|
||||
#include <nvcore/Containers.h> // swap
|
||||
|
||||
|
||||
using namespace nv;
|
||||
|
||||
@ -51,94 +51,20 @@ void nv::Quantize::BinaryAlpha( Image * image, int alpha_threshold /*= 127*/ )
|
||||
// Simple quantization.
|
||||
void nv::Quantize::RGB16( Image * image )
|
||||
{
|
||||
nvCheck(image != NULL);
|
||||
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
|
||||
for(uint y = 0; y < h; y++) {
|
||||
for(uint x = 0; x < w; x++) {
|
||||
|
||||
Color32 pixel32 = image->pixel(x, y);
|
||||
|
||||
// Convert to 16 bit and back to 32 using regular bit expansion.
|
||||
Color32 pixel16 = toColor32( toColor16(pixel32) );
|
||||
|
||||
// Store color.
|
||||
image->pixel(x, y) = pixel16;
|
||||
}
|
||||
}
|
||||
Truncate(image, 5, 6, 5, 8);
|
||||
}
|
||||
|
||||
// Alpha quantization.
|
||||
void nv::Quantize::Alpha4( Image * image )
|
||||
{
|
||||
nvCheck(image != NULL);
|
||||
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
|
||||
for(uint y = 0; y < h; y++) {
|
||||
for(uint x = 0; x < w; x++) {
|
||||
|
||||
Color32 pixel = image->pixel(x, y);
|
||||
|
||||
// Convert to 4 bit using regular bit expansion.
|
||||
pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
|
||||
|
||||
// Store color.
|
||||
image->pixel(x, y) = pixel;
|
||||
}
|
||||
}
|
||||
Truncate(image, 8, 8, 8, 4);
|
||||
}
|
||||
|
||||
|
||||
// Error diffusion. Floyd Steinberg.
|
||||
void nv::Quantize::FloydSteinberg_RGB16( Image * image )
|
||||
{
|
||||
nvCheck(image != NULL);
|
||||
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
|
||||
// @@ Use fixed point?
|
||||
Vector3 * row0 = new Vector3[w+2];
|
||||
Vector3 * row1 = new Vector3[w+2];
|
||||
memset(row0, 0, sizeof(Vector3)*(w+2));
|
||||
memset(row1, 0, sizeof(Vector3)*(w+2));
|
||||
|
||||
for(uint y = 0; y < h; y++) {
|
||||
for(uint x = 0; x < w; x++) {
|
||||
|
||||
Color32 pixel32 = image->pixel(x, y);
|
||||
|
||||
// Add error. // @@ We shouldn't clamp here!
|
||||
pixel32.r = clamp(int(pixel32.r) + int(row0[1+x].x()), 0, 255);
|
||||
pixel32.g = clamp(int(pixel32.g) + int(row0[1+x].y()), 0, 255);
|
||||
pixel32.b = clamp(int(pixel32.b) + int(row0[1+x].z()), 0, 255);
|
||||
|
||||
// Convert to 16 bit. @@ Use regular clamp?
|
||||
Color32 pixel16 = toColor32( toColor16(pixel32) );
|
||||
|
||||
// Store color.
|
||||
image->pixel(x, y) = pixel16;
|
||||
|
||||
// Compute new error.
|
||||
Vector3 diff(float(pixel32.r - pixel16.r), float(pixel32.g - pixel16.g), float(pixel32.b - pixel16.b));
|
||||
|
||||
// Propagate new error.
|
||||
row0[1+x+1] += 7.0f / 16.0f * diff;
|
||||
row1[1+x-1] += 3.0f / 16.0f * diff;
|
||||
row1[1+x+0] += 5.0f / 16.0f * diff;
|
||||
row1[1+x+1] += 1.0f / 16.0f * diff;
|
||||
}
|
||||
|
||||
swap(row0, row1);
|
||||
memset(row1, 0, sizeof(Vector3)*(w+2));
|
||||
}
|
||||
|
||||
delete [] row0;
|
||||
delete [] row1;
|
||||
FloydSteinberg(image, 5, 6, 5, 8);
|
||||
}
|
||||
|
||||
|
||||
@ -192,34 +118,90 @@ void nv::Quantize::FloydSteinberg_BinaryAlpha( Image * image, int alpha_threshol
|
||||
|
||||
// Error diffusion. Floyd Steinberg.
|
||||
void nv::Quantize::FloydSteinberg_Alpha4( Image * image )
|
||||
{
|
||||
FloydSteinberg(image, 8, 8, 8, 4);
|
||||
}
|
||||
|
||||
|
||||
void nv::Quantize::Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize)
|
||||
{
|
||||
nvCheck(image != NULL);
|
||||
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
|
||||
// @@ Use fixed point?
|
||||
float * row0 = new float[(w+2)];
|
||||
float * row1 = new float[(w+2)];
|
||||
memset(row0, 0, sizeof(float)*(w+2));
|
||||
memset(row1, 0, sizeof(float)*(w+2));
|
||||
|
||||
for(uint y = 0; y < h; y++) {
|
||||
for(uint x = 0; x < w; x++) {
|
||||
|
||||
Color32 pixel = image->pixel(x, y);
|
||||
|
||||
// Add error.
|
||||
int alpha = int(pixel.a) + int(row0[1+x]);
|
||||
|
||||
// Convert to 4 bit using regular bit expansion.
|
||||
pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
|
||||
|
||||
|
||||
// Convert to our desired size, and reconstruct.
|
||||
pixel.r = PixelFormat::convert(pixel.r, 8, rsize);
|
||||
pixel.r = PixelFormat::convert(pixel.r, rsize, 8);
|
||||
|
||||
pixel.g = PixelFormat::convert(pixel.g, 8, gsize);
|
||||
pixel.g = PixelFormat::convert(pixel.g, gsize, 8);
|
||||
|
||||
pixel.b = PixelFormat::convert(pixel.b, 8, bsize);
|
||||
pixel.b = PixelFormat::convert(pixel.b, bsize, 8);
|
||||
|
||||
pixel.a = PixelFormat::convert(pixel.a, 8, asize);
|
||||
pixel.a = PixelFormat::convert(pixel.a, asize, 8);
|
||||
|
||||
// Store color.
|
||||
image->pixel(x, y) = pixel;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Error diffusion. Floyd Steinberg.
|
||||
void nv::Quantize::FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize)
|
||||
{
|
||||
nvCheck(image != NULL);
|
||||
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
|
||||
Vector4 * row0 = new Vector4[w+2];
|
||||
Vector4 * row1 = new Vector4[w+2];
|
||||
memset(row0, 0, sizeof(Vector4)*(w+2));
|
||||
memset(row1, 0, sizeof(Vector4)*(w+2));
|
||||
|
||||
for (uint y = 0; y < h; y++) {
|
||||
for (uint x = 0; x < w; x++) {
|
||||
|
||||
Color32 pixel = image->pixel(x, y);
|
||||
|
||||
// Add error.
|
||||
pixel.r = clamp(int(pixel.r) + int(row0[1+x].x()), 0, 255);
|
||||
pixel.g = clamp(int(pixel.g) + int(row0[1+x].y()), 0, 255);
|
||||
pixel.b = clamp(int(pixel.b) + int(row0[1+x].z()), 0, 255);
|
||||
pixel.a = clamp(int(pixel.a) + int(row0[1+x].w()), 0, 255);
|
||||
|
||||
int r = pixel.r;
|
||||
int g = pixel.g;
|
||||
int b = pixel.b;
|
||||
int a = pixel.a;
|
||||
|
||||
// Convert to our desired size, and reconstruct.
|
||||
r = PixelFormat::convert(r, 8, rsize);
|
||||
r = PixelFormat::convert(r, rsize, 8);
|
||||
|
||||
g = PixelFormat::convert(g, 8, gsize);
|
||||
g = PixelFormat::convert(g, gsize, 8);
|
||||
|
||||
b = PixelFormat::convert(b, 8, bsize);
|
||||
b = PixelFormat::convert(b, bsize, 8);
|
||||
|
||||
a = PixelFormat::convert(a, 8, asize);
|
||||
a = PixelFormat::convert(a, asize, 8);
|
||||
|
||||
// Store color.
|
||||
image->pixel(x, y) = Color32(r, g, b, a);
|
||||
|
||||
// Compute new error.
|
||||
float diff = float(alpha - pixel.a);
|
||||
Vector4 diff(float(int(pixel.r) - r), float(int(pixel.g) - g), float(int(pixel.b) - b), float(int(pixel.a) - a));
|
||||
|
||||
// Propagate new error.
|
||||
row0[1+x+1] += 7.0f / 16.0f * diff;
|
||||
@ -229,10 +211,9 @@ void nv::Quantize::FloydSteinberg_Alpha4( Image * image )
|
||||
}
|
||||
|
||||
swap(row0, row1);
|
||||
memset(row1, 0, sizeof(float)*(w+2));
|
||||
memset(row1, 0, sizeof(Vector4)*(w+2));
|
||||
}
|
||||
|
||||
delete [] row0;
|
||||
delete [] row1;
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,9 @@
|
||||
#ifndef NV_IMAGE_QUANTIZE_H
|
||||
#define NV_IMAGE_QUANTIZE_H
|
||||
|
||||
#include <nvimage/nvimage.h>
|
||||
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class Image;
|
||||
@ -17,6 +20,9 @@ namespace nv
|
||||
void FloydSteinberg_BinaryAlpha(Image * img, int alpha_threshold = 127);
|
||||
void FloydSteinberg_Alpha4(Image * img);
|
||||
|
||||
void Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize);
|
||||
void FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize);
|
||||
|
||||
// @@ Add palette quantization algorithms!
|
||||
}
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
|
||||
return;
|
||||
}
|
||||
}
|
||||
normal = nv::normalize(normal, epsilon);
|
||||
normal = ::normalize(normal, epsilon);
|
||||
|
||||
tangent -= normal * dot(normal, tangent);
|
||||
bitangent -= normal * dot(normal, bitangent);
|
||||
@ -68,8 +68,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
|
||||
}
|
||||
else
|
||||
{
|
||||
#if 0
|
||||
tangent = nv::normalize(tangent, epsilon);
|
||||
tangent = ::normalize(tangent, epsilon);
|
||||
bitangent -= tangent * dot(tangent, bitangent);
|
||||
|
||||
if (length(bitangent) < epsilon)
|
||||
@ -79,47 +78,11 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
|
||||
}
|
||||
else
|
||||
{
|
||||
bitangent = nv::normalize(bitangent, epsilon);
|
||||
tangent = ::normalize(tangent, epsilon);
|
||||
}
|
||||
#else
|
||||
if (length(bitangent) < epsilon)
|
||||
{
|
||||
bitangent = cross(tangent, normal);
|
||||
nvCheck(isNormalized(bitangent));
|
||||
}
|
||||
else
|
||||
{
|
||||
tangent = nv::normalize(tangent);
|
||||
bitangent = nv::normalize(bitangent);
|
||||
|
||||
Vector3 bisector = nv::normalize(tangent + bitangent);
|
||||
Vector3 axis = cross(bisector, normal);
|
||||
|
||||
nvDebugCheck(isNormalized(axis, epsilon));
|
||||
nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon));
|
||||
|
||||
if (dot(axis, tangent) > 0)
|
||||
{
|
||||
tangent = nv::normalize(bisector + axis);
|
||||
bitangent = nv::normalize(bisector - axis);
|
||||
}
|
||||
else
|
||||
{
|
||||
tangent = nv::normalize(bisector - axis);
|
||||
bitangent = nv::normalize(bisector + axis);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*// Check vector lengths.
|
||||
if (!isNormalized(normal, epsilon))
|
||||
{
|
||||
nvDebug("%f %f %f\n", normal.x(), normal.y(), normal.z());
|
||||
nvDebug("%f %f %f\n", tangent.x(), tangent.y(), tangent.z());
|
||||
nvDebug("%f %f %f\n", bitangent.x(), bitangent.y(), bitangent.z());
|
||||
}*/
|
||||
|
||||
// Check vector lengths.
|
||||
nvCheck(isNormalized(normal, epsilon));
|
||||
nvCheck(isNormalized(tangent, epsilon));
|
||||
nvCheck(isNormalized(bitangent, epsilon));
|
||||
@ -162,18 +125,9 @@ void Basis::buildFrameForDirection(Vector3::Arg d)
|
||||
bitangent = cross(normal, tangent);
|
||||
}
|
||||
|
||||
bool Basis::isValid() const
|
||||
{
|
||||
if (equal(normal, Vector3(zero))) return false;
|
||||
if (equal(tangent, Vector3(zero))) return false;
|
||||
if (equal(bitangent, Vector3(zero))) return false;
|
||||
|
||||
if (equal(determinant(), 0.0f)) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
/// Transform by this basis. (From this basis to object space).
|
||||
Vector3 Basis::transform(Vector3::Arg v) const
|
||||
{
|
||||
@ -190,31 +144,30 @@ Vector3 Basis::transformT(Vector3::Arg v)
|
||||
}
|
||||
|
||||
/// Transform by the inverse. (From object space to this basis).
|
||||
/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned.
|
||||
/// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned.
|
||||
Vector3 Basis::transformI(Vector3::Arg v) const
|
||||
{
|
||||
const float det = determinant();
|
||||
nvDebugCheck(!equal(det, 0.0f, 0.0f));
|
||||
nvCheck(!equalf(det, 0.0f));
|
||||
|
||||
const float idet = 1.0f / det;
|
||||
|
||||
// Rows of the inverse matrix.
|
||||
Vector3 r0(
|
||||
(bitangent.y() * normal.z() - bitangent.z() * normal.y()),
|
||||
-(bitangent.x() * normal.z() - bitangent.z() * normal.x()),
|
||||
(bitangent.x() * normal.y() - bitangent.y() * normal.x()));
|
||||
Vector3 r0, r1, r2;
|
||||
r0.x = (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet;
|
||||
r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet;
|
||||
r0.z = (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet;
|
||||
|
||||
Vector3 r1(
|
||||
-(tangent.y() * normal.z() - tangent.z() * normal.y()),
|
||||
(tangent.x() * normal.z() - tangent.z() * normal.x()),
|
||||
-(tangent.x() * normal.y() - tangent.y() * normal.x()));
|
||||
r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet;
|
||||
r1.y = (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet;
|
||||
r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet;
|
||||
|
||||
Vector3 r2(
|
||||
(tangent.y() * bitangent.z() - tangent.z() * bitangent.y()),
|
||||
-(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()),
|
||||
(tangent.x() * bitangent.y() - tangent.y() * bitangent.x()));
|
||||
r2.x = (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet;
|
||||
r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet;
|
||||
r2.z = (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet;
|
||||
|
||||
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet;
|
||||
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2));
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
@ -54,8 +54,7 @@ namespace nv
|
||||
tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y();
|
||||
}
|
||||
|
||||
bool isValid() const;
|
||||
|
||||
/*
|
||||
// Get transform matrix for this basis.
|
||||
NVMATH_API Matrix matrix() const;
|
||||
|
||||
@ -67,7 +66,7 @@ namespace nv
|
||||
|
||||
// Transform by the inverse. (From object space to this basis).
|
||||
NVMATH_API Vector3 transformI(Vector3::Arg v) const;
|
||||
|
||||
*/
|
||||
|
||||
Vector3 tangent;
|
||||
Vector3 bitangent;
|
||||
|
@ -9,7 +9,6 @@
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class Stream;
|
||||
|
||||
/// Axis Aligned Bounding Box.
|
||||
class Box
|
||||
@ -28,13 +27,11 @@ public:
|
||||
// Cast operators.
|
||||
operator const float * () const { return reinterpret_cast<const float *>(this); }
|
||||
|
||||
// Min corner of the box.
|
||||
Vector3 minCorner() const { return m_mins; }
|
||||
Vector3 & minCorner() { return m_mins; }
|
||||
/// Min corner of the box.
|
||||
Vector3 mins() const { return m_mins; }
|
||||
|
||||
// Max corner of the box.
|
||||
Vector3 maxCorner() const { return m_maxs; }
|
||||
Vector3 & maxCorner() { return m_maxs; }
|
||||
/// Max corner of the box.
|
||||
Vector3 maxs() const { return m_maxs; }
|
||||
|
||||
/// Clear the bounds.
|
||||
void clearBounds()
|
||||
@ -129,8 +126,6 @@ public:
|
||||
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
|
||||
}
|
||||
|
||||
friend Stream & operator<< (Stream & s, Box & box);
|
||||
|
||||
private:
|
||||
|
||||
Vector3 m_mins;
|
||||
|
@ -5,19 +5,13 @@ SET(MATH_SRCS
|
||||
Vector.h
|
||||
Matrix.h
|
||||
Quaternion.h
|
||||
Plane.h Plane.cpp
|
||||
Box.h
|
||||
Color.h
|
||||
Montecarlo.h Montecarlo.cpp
|
||||
Random.h Random.cpp
|
||||
SphericalHarmonic.h SphericalHarmonic.cpp
|
||||
Basis.h Basis.cpp
|
||||
Triangle.h Triangle.cpp TriBox.cpp
|
||||
Polygon.h Polygon.cpp
|
||||
TypeSerialization.h TypeSerialization.cpp
|
||||
Sparse.h Sparse.cpp
|
||||
Solver.h Solver.cpp
|
||||
KahanSum.h)
|
||||
Triangle.h Triangle.cpp TriBox.cpp)
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
|
@ -1,38 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_MATH_KAHANSUM_H
|
||||
#define NV_MATH_KAHANSUM_H
|
||||
|
||||
#include <nvmath/nvmath.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
class KahanSum
|
||||
{
|
||||
public:
|
||||
KahanSum() : accum(0.0f), err(0) {};
|
||||
|
||||
void add(float f)
|
||||
{
|
||||
float compensated = f + err;
|
||||
float tmp = accum + compensated;
|
||||
err = accum - tmp;
|
||||
err += compensated;
|
||||
accum = tmp;
|
||||
}
|
||||
|
||||
float sum() const
|
||||
{
|
||||
return accum;
|
||||
}
|
||||
|
||||
private:
|
||||
float accum;
|
||||
float err;
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
#endif // NV_MATH_KAHANSUM_H
|
@ -332,7 +332,7 @@ inline Matrix transpose(Matrix::Arg m)
|
||||
Matrix r;
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
for (int j = 0; j < 4; i++)
|
||||
for (int j = 0; j < 4; j++)
|
||||
{
|
||||
r(i, j) = m(j, i);
|
||||
}
|
||||
|
@ -1,168 +0,0 @@
|
||||
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
|
||||
|
||||
#include <nvmath/Polygon.h>
|
||||
|
||||
#include <nvmath/Triangle.h>
|
||||
#include <nvmath/Plane.h>
|
||||
|
||||
using namespace nv;
|
||||
|
||||
|
||||
Polygon::Polygon()
|
||||
{
|
||||
}
|
||||
|
||||
Polygon::Polygon(const Triangle & t)
|
||||
{
|
||||
pointArray.resize(3);
|
||||
pointArray[0] = t.v[0];
|
||||
pointArray[1] = t.v[1];
|
||||
pointArray[2] = t.v[2];
|
||||
}
|
||||
|
||||
Polygon::Polygon(const Vector3 * points, uint vertexCount)
|
||||
{
|
||||
pointArray.resize(vertexCount);
|
||||
|
||||
for (uint i = 0; i < vertexCount; i++)
|
||||
{
|
||||
pointArray[i] = points[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Compute polygon area.
|
||||
float Polygon::area() const
|
||||
{
|
||||
float total = 0;
|
||||
|
||||
const uint pointCount = pointArray.count();
|
||||
for (uint i = 2; i < pointCount; i++)
|
||||
{
|
||||
Vector3 v1 = pointArray[i-1] - pointArray[0];
|
||||
Vector3 v2 = pointArray[i] - pointArray[0];
|
||||
|
||||
total += 0.5f * length(cross(v1, v2));
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
/// Get the bounds of the polygon.
|
||||
Box Polygon::bounds() const
|
||||
{
|
||||
Box bounds;
|
||||
bounds.clearBounds();
|
||||
foreach(p, pointArray)
|
||||
{
|
||||
bounds.addPointToBounds(pointArray[p]);
|
||||
}
|
||||
return bounds;
|
||||
}
|
||||
|
||||
|
||||
/// Get the plane of the polygon.
|
||||
Plane Polygon::plane() const
|
||||
{
|
||||
// @@ Do something better than this?
|
||||
Vector3 n = cross(pointArray[1] - pointArray[0], pointArray[2] - pointArray[0]);
|
||||
return Vector4(n, dot(n, pointArray[0]));
|
||||
}
|
||||
|
||||
|
||||
/// Clip polygon to box.
|
||||
uint Polygon::clipTo(const Box & box)
|
||||
{
|
||||
const Plane posX( 1, 0, 0, box.maxCorner().x());
|
||||
const Plane negX(-1, 0, 0,-box.minCorner().x());
|
||||
const Plane posY( 0, 1, 0, box.maxCorner().y());
|
||||
const Plane negY( 0,-1, 0,-box.minCorner().y());
|
||||
const Plane posZ( 0, 0, 1, box.maxCorner().z());
|
||||
const Plane negZ( 0, 0,-1,-box.minCorner().z());
|
||||
|
||||
if (clipTo(posX) == 0) return 0;
|
||||
if (clipTo(negX) == 0) return 0;
|
||||
if (clipTo(posY) == 0) return 0;
|
||||
if (clipTo(negY) == 0) return 0;
|
||||
if (clipTo(posZ) == 0) return 0;
|
||||
if (clipTo(negZ) == 0) return 0;
|
||||
|
||||
return pointArray.count();
|
||||
}
|
||||
|
||||
|
||||
/// Clip polygon to plane.
|
||||
uint Polygon::clipTo(const Plane & plane)
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
const uint pointCount = pointArray.count();
|
||||
|
||||
Array<Vector3> newPointArray(pointCount + 1); // @@ Do not create copy every time.
|
||||
|
||||
Vector3 prevPoint = pointArray[pointCount - 1];
|
||||
float prevDist = dot(plane.vector(), prevPoint) - plane.offset();
|
||||
|
||||
for (uint i = 0; i < pointCount; i++)
|
||||
{
|
||||
const Vector3 point = pointArray[i];
|
||||
float dist = dot(plane.vector(), point) - plane.offset();
|
||||
|
||||
// @@ Handle points on plane better.
|
||||
|
||||
if (dist <= 0) // interior.
|
||||
{
|
||||
if (prevDist > 0) // exterior
|
||||
{
|
||||
// Add segment intersection point.
|
||||
Vector3 dp = point - prevPoint;
|
||||
|
||||
float t = dist / prevDist;
|
||||
newPointArray.append(point - dp * t);
|
||||
}
|
||||
|
||||
// Add interior point.
|
||||
newPointArray.append(point);
|
||||
}
|
||||
else if (dist > 0 && prevDist < 0)
|
||||
{
|
||||
// Add segment intersection point.
|
||||
Vector3 dp = point - prevPoint;
|
||||
|
||||
float t = dist / prevDist;
|
||||
newPointArray.append(point - dp * t);
|
||||
}
|
||||
|
||||
prevPoint = point;
|
||||
prevDist = dist;
|
||||
}
|
||||
|
||||
swap(pointArray, newPointArray);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
void Polygon::removeColinearPoints()
|
||||
{
|
||||
const uint pointCount = pointArray.count();
|
||||
|
||||
Array<Vector3> newPointArray(pointCount);
|
||||
|
||||
for (uint i = 0 ; i < pointCount; i++)
|
||||
{
|
||||
int j = (i + 1) % pointCount;
|
||||
int k = (i + pointCount - 1) % pointCount;
|
||||
|
||||
Vector3 v1 = normalize(pointArray[j] - pointArray[i]);
|
||||
Vector3 v2 = normalize(pointArray[i] - pointArray[k]);
|
||||
|
||||
if (dot(v1, v2) < 0.999)
|
||||
{
|
||||
newPointArray.append(pointArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
swap(pointArray, newPointArray);
|
||||
}
|
||||
|
@ -1,45 +0,0 @@
|
||||
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
|
||||
|
||||
#ifndef NV_MATH_POLYGON_H
|
||||
#define NV_MATH_POLYGON_H
|
||||
|
||||
#include <nvcore/Containers.h>
|
||||
|
||||
#include <nvmath/nvmath.h>
|
||||
#include <nvmath/Vector.h>
|
||||
#include <nvmath/Box.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class Box;
|
||||
class Plane;
|
||||
class Triangle;
|
||||
|
||||
|
||||
class Polygon
|
||||
{
|
||||
NV_FORBID_COPY(Polygon);
|
||||
public:
|
||||
|
||||
Polygon();
|
||||
Polygon(const Triangle & t);
|
||||
Polygon(const Vector3 * points, uint vertexCount);
|
||||
|
||||
float area() const;
|
||||
Box bounds() const;
|
||||
Plane plane() const;
|
||||
|
||||
uint clipTo(const Box & box);
|
||||
uint clipTo(const Plane & plane);
|
||||
|
||||
void removeColinearPoints();
|
||||
|
||||
private:
|
||||
|
||||
Array<Vector3> pointArray;
|
||||
};
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_MATH_POLYGON_H
|
@ -51,6 +51,7 @@ namespace nv
|
||||
|
||||
inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b)
|
||||
{
|
||||
// @@ Efficient SIMD implementation?
|
||||
return Quaternion(
|
||||
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
|
||||
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(),
|
||||
@ -58,40 +59,6 @@ namespace nv
|
||||
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w());
|
||||
}
|
||||
|
||||
inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b)
|
||||
{
|
||||
return Quaternion(
|
||||
+ a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
|
||||
- a.x() * b.z() + a.z()*b.x() + a.w()*b.y(),
|
||||
+ a.x() * b.y() - a.y()*b.x() + a.w()*b.z(),
|
||||
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() );
|
||||
}
|
||||
|
||||
inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b)
|
||||
{
|
||||
return Quaternion(
|
||||
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y(),
|
||||
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x(),
|
||||
+ a.x() * b.y() - a.y()*b.x() + a.z()*b.w(),
|
||||
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z());
|
||||
}
|
||||
|
||||
inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b)
|
||||
{
|
||||
return mul(a, b);
|
||||
}
|
||||
|
||||
inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b)
|
||||
{
|
||||
return mul(a, b);
|
||||
}
|
||||
|
||||
inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b)
|
||||
{
|
||||
return mul(a, b);
|
||||
}
|
||||
|
||||
|
||||
inline Quaternion scale(Quaternion::Arg q, float s)
|
||||
{
|
||||
return scale(q.asVector(), s);
|
||||
@ -155,24 +122,6 @@ namespace nv
|
||||
return Quaternion(Vector4(v * s, c));
|
||||
}
|
||||
|
||||
inline Vector3 imag(Quaternion::Arg q)
|
||||
{
|
||||
return q.asVector().xyz();
|
||||
}
|
||||
|
||||
inline float real(Quaternion::Arg q)
|
||||
{
|
||||
return q.w();
|
||||
}
|
||||
|
||||
|
||||
/// Transform vector.
|
||||
inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v)
|
||||
{
|
||||
Quaternion t = q * v * conjugate(q);
|
||||
return imag(t);
|
||||
}
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
@ -1,726 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#include <nvmath/Solver.h>
|
||||
|
||||
using namespace nv;
|
||||
|
||||
namespace
|
||||
{
|
||||
class Preconditioner
|
||||
{
|
||||
public:
|
||||
// Virtual dtor.
|
||||
virtual ~Preconditioner() { }
|
||||
|
||||
// Apply preconditioning step.
|
||||
virtual void apply(const FullVector & x, FullVector & y) const = 0;
|
||||
};
|
||||
|
||||
|
||||
// Jacobi preconditioner.
|
||||
class JacobiPreconditioner : public Preconditioner
|
||||
{
|
||||
public:
|
||||
|
||||
JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width())
|
||||
{
|
||||
nvCheck(M.isSquare());
|
||||
|
||||
for(uint x = 0; x < M.width(); x++)
|
||||
{
|
||||
float elem = M.getCoefficient(x, x);
|
||||
nvDebugCheck( elem != 0.0f );
|
||||
|
||||
if (symmetric)
|
||||
{
|
||||
m_inverseDiagonal[x] = 1.0f / sqrt(fabs(elem));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_inverseDiagonal[x] = 1.0f / elem;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void apply(const FullVector & x, FullVector & y) const
|
||||
{
|
||||
y *= x;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
FullVector m_inverseDiagonal;
|
||||
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
static int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
|
||||
static int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
|
||||
|
||||
|
||||
// Solve the symmetric system: At<41>A<EFBFBD>x = At<41>b
|
||||
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
|
||||
{
|
||||
nvDebugCheck(A.width() == x.dimension());
|
||||
nvDebugCheck(A.height() == b.dimension());
|
||||
nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly...
|
||||
|
||||
const uint D = A.width();
|
||||
|
||||
FullVector Atb(D);
|
||||
mult(Transposed, A, b, Atb);
|
||||
|
||||
SparseMatrix AtA(D);
|
||||
mult(Transposed, A, NoTransposed, A, AtA);
|
||||
|
||||
SymmetricSolver(AtA, Atb, x, epsilon);
|
||||
}
|
||||
|
||||
|
||||
// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
|
||||
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/)
|
||||
{
|
||||
nvDebugCheck(A.width() == x.dimension());
|
||||
nvDebugCheck(A.height() == b.dimension());
|
||||
nvDebugCheck(A.height() >= A.width() - lockedCount);
|
||||
|
||||
// @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
|
||||
|
||||
const uint D = A.width() - lockedCount;
|
||||
nvDebugCheck(D > 0);
|
||||
|
||||
// Compute: b - Al * xl
|
||||
FullVector b_Alxl(b);
|
||||
|
||||
for (uint y = 0; y < A.height(); y++)
|
||||
{
|
||||
const uint count = A.getRow(y).count();
|
||||
for (uint e = 0; e < count; e++)
|
||||
{
|
||||
uint column = A.getRow(y)[e].x;
|
||||
|
||||
bool isFree = true;
|
||||
for (uint i = 0; i < lockedCount; i++)
|
||||
{
|
||||
isFree &= (lockedParameters[i] != column);
|
||||
}
|
||||
|
||||
if (!isFree)
|
||||
{
|
||||
b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove locked columns from A.
|
||||
SparseMatrix Af(D, A.height());
|
||||
|
||||
for (uint y = 0; y < A.height(); y++)
|
||||
{
|
||||
const uint count = A.getRow(y).count();
|
||||
for (uint e = 0; e < count; e++)
|
||||
{
|
||||
uint column = A.getRow(y)[e].x;
|
||||
uint ix = column;
|
||||
|
||||
bool isFree = true;
|
||||
for (uint i = 0; i < lockedCount; i++)
|
||||
{
|
||||
isFree &= (lockedParameters[i] != column);
|
||||
if (column > lockedParameters[i]) ix--; // shift columns
|
||||
}
|
||||
|
||||
if (isFree)
|
||||
{
|
||||
Af.setCoefficient(ix, y, A.getRow(y)[e].v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove elements from x
|
||||
FullVector xf(D);
|
||||
|
||||
for (uint i = 0, j = 0; i < A.width(); i++)
|
||||
{
|
||||
bool isFree = true;
|
||||
for (uint l = 0; l < lockedCount; l++)
|
||||
{
|
||||
isFree &= (lockedParameters[l] != i);
|
||||
}
|
||||
|
||||
if (isFree)
|
||||
{
|
||||
xf[j++] = x[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Solve reduced system.
|
||||
LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
|
||||
|
||||
// Copy results back to x.
|
||||
for (uint i = 0, j = 0; i < A.width(); i++)
|
||||
{
|
||||
bool isFree = true;
|
||||
for (uint l = 0; l < lockedCount; l++)
|
||||
{
|
||||
isFree &= (lockedParameters[l] != i);
|
||||
}
|
||||
|
||||
if (isFree)
|
||||
{
|
||||
x[i] = xf[j++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
|
||||
{
|
||||
nvDebugCheck(A.height() == A.width());
|
||||
nvDebugCheck(A.height() == b.dimension());
|
||||
nvDebugCheck(b.dimension() == x.dimension());
|
||||
|
||||
// JacobiPreconditioner jacobi(A, true);
|
||||
|
||||
// ConjugateGradientSolver(jacobi, A, b, x, epsilon);
|
||||
ConjugateGradientSolver(A, b, x, epsilon);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compute the solution of the sparse linear system Ab=x using the Conjugate
|
||||
* Gradient method.
|
||||
*
|
||||
* Solving sparse linear systems:
|
||||
* (1) A<>x = b
|
||||
*
|
||||
* The conjugate gradient algorithm solves (1) only in the case that A is
|
||||
* symmetric and positive definite. It is based on the idea of minimizing the
|
||||
* function
|
||||
*
|
||||
* (2) f(x) = 1/2<>x<EFBFBD>A<EFBFBD>x - b<>x
|
||||
*
|
||||
* This function is minimized when its gradient
|
||||
*
|
||||
* (3) df = A<>x - b
|
||||
*
|
||||
* is zero, which is equivalent to (1). The minimization is carried out by
|
||||
* generating a succession of search directions p.k and improved minimizers x.k.
|
||||
* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k<>p.k),
|
||||
* and x.k+1 is set equal to the new point x.k + alfa.k<>p.k. The p.k and x.k are
|
||||
* built up in such a way that x.k+1 is also the minimizer of f over the whole
|
||||
* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N
|
||||
* iterations you arrive at the minimizer over the entire vector space, i.e., the
|
||||
* solution to (1).
|
||||
*
|
||||
* For a really good explanation of the method see:
|
||||
*
|
||||
* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
|
||||
* Jonhathan Richard Shewchuk.
|
||||
*
|
||||
**/
|
||||
/*static*/ int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
|
||||
{
|
||||
nvDebugCheck( A.isSquare() );
|
||||
nvDebugCheck( A.width() == b.dimension() );
|
||||
nvDebugCheck( A.width() == x.dimension() );
|
||||
|
||||
int i = 0;
|
||||
const int D = A.width();
|
||||
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
|
||||
|
||||
FullVector r(D); // residual
|
||||
FullVector p(D); // search direction
|
||||
FullVector q(D); //
|
||||
float delta_0;
|
||||
float delta_old;
|
||||
float delta_new;
|
||||
float alpha;
|
||||
float beta;
|
||||
|
||||
// r = b - A<>x;
|
||||
copy(b, r);
|
||||
sgemv(-1, A, x, 1, r);
|
||||
|
||||
// p = r;
|
||||
copy(r, p);
|
||||
|
||||
delta_new = dot( r, r );
|
||||
delta_0 = delta_new;
|
||||
|
||||
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
|
||||
{
|
||||
i++;
|
||||
|
||||
// q = A<>p
|
||||
mult(A, p, q);
|
||||
|
||||
// alpha = delta_new / p<>q
|
||||
alpha = delta_new / dot( p, q );
|
||||
|
||||
// x = alfa<66>p + x
|
||||
saxpy(alpha, p, x);
|
||||
|
||||
if ((i & 31) == 0) // recompute r after 32 steps
|
||||
{
|
||||
// r = b - A<>x
|
||||
copy(b, r);
|
||||
sgemv(-1, A, x, 1, r);
|
||||
}
|
||||
else
|
||||
{
|
||||
// r = r - alpha<68>q
|
||||
saxpy(-alpha, q, r);
|
||||
}
|
||||
|
||||
delta_old = delta_new;
|
||||
delta_new = dot( r, r );
|
||||
|
||||
beta = delta_new / delta_old;
|
||||
|
||||
// p = r + beta<74>p
|
||||
copy(r, p);
|
||||
saxpy(beta, p, r);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
// Conjugate gradient with preconditioner.
|
||||
/*static*/ int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
|
||||
{
|
||||
nvDebugCheck( A.isSquare() );
|
||||
nvDebugCheck( A.width() == b.dimension() );
|
||||
nvDebugCheck( A.width() == x.dimension() );
|
||||
|
||||
int i = 0;
|
||||
const int D = A.width();
|
||||
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
|
||||
|
||||
FullVector r(D); // residual
|
||||
FullVector p(D); // search direction
|
||||
FullVector q(D); //
|
||||
FullVector s(D); // preconditioned
|
||||
float delta_0;
|
||||
float delta_old;
|
||||
float delta_new;
|
||||
float alpha;
|
||||
float beta;
|
||||
|
||||
// r = b - A<>x
|
||||
copy(b, r);
|
||||
sgemv(-1, A, x, 1, r);
|
||||
|
||||
|
||||
// p = M^-1 <20> r
|
||||
preconditioner.apply(r, p);
|
||||
//copy(r, p);
|
||||
|
||||
|
||||
delta_new = dot(r, p);
|
||||
delta_0 = delta_new;
|
||||
|
||||
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
|
||||
{
|
||||
i++;
|
||||
|
||||
// q = A<>p
|
||||
mult(A, p, q);
|
||||
|
||||
// alpha = delta_new / p<>q
|
||||
alpha = delta_new / dot(p, q);
|
||||
|
||||
// x = alfa<66>p + x
|
||||
saxpy(alpha, p, x);
|
||||
|
||||
if ((i & 31) == 0) // recompute r after 32 steps
|
||||
{
|
||||
// r = b - A<>x
|
||||
copy(b, r);
|
||||
sgemv(-1, A, x, 1, r);
|
||||
}
|
||||
else
|
||||
{
|
||||
// r = r - alfa<66>q
|
||||
saxpy(-alpha, q, r);
|
||||
}
|
||||
|
||||
// s = M^-1 <20> r
|
||||
preconditioner.apply(r, s);
|
||||
//copy(r, s);
|
||||
|
||||
delta_old = delta_new;
|
||||
delta_new = dot( r, s );
|
||||
|
||||
beta = delta_new / delta_old;
|
||||
|
||||
// p = s + beta<74>p
|
||||
copy(s, p);
|
||||
saxpy(beta, p, s);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
#if 0 // Nonsymmetric solvers
|
||||
|
||||
/** Bi-conjugate gradient method. */
|
||||
MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
|
||||
piDebugCheck( A.IsSquare() );
|
||||
piDebugCheck( A.Width() == b.Dim() );
|
||||
piDebugCheck( A.Width() == x.Dim() );
|
||||
|
||||
int i = 0;
|
||||
const int D = A.Width();
|
||||
const int i_max = 4 * D;
|
||||
|
||||
float resid;
|
||||
float rho_1 = 0;
|
||||
float rho_2 = 0;
|
||||
float alpha;
|
||||
float beta;
|
||||
|
||||
DenseVector r(D);
|
||||
DenseVector rtilde(D);
|
||||
DenseVector p(D);
|
||||
DenseVector ptilde(D);
|
||||
DenseVector q(D);
|
||||
DenseVector qtilde(D);
|
||||
DenseVector tmp(D); // temporal vector.
|
||||
|
||||
// r = b - A<>x;
|
||||
A.Product( x, tmp );
|
||||
r.Sub( b, tmp );
|
||||
|
||||
// rtilde = r
|
||||
rtilde.Set( r );
|
||||
|
||||
// p = r;
|
||||
p.Set( r );
|
||||
|
||||
// ptilde = rtilde
|
||||
ptilde.Set( rtilde );
|
||||
|
||||
|
||||
|
||||
float normb = b.Norm();
|
||||
if( normb == 0.0 ) normb = 1;
|
||||
|
||||
// test convergence
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
// method converges?
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
while( i < i_max ) {
|
||||
|
||||
i++;
|
||||
|
||||
rho_1 = DenseVectorDotProduct( r, rtilde );
|
||||
|
||||
if( rho_1 == 0 ) {
|
||||
// method fails.
|
||||
return -i;
|
||||
}
|
||||
|
||||
if (i == 1) {
|
||||
p.Set( r );
|
||||
ptilde.Set( rtilde );
|
||||
}
|
||||
else {
|
||||
beta = rho_1 / rho_2;
|
||||
|
||||
// p = r + beta * p;
|
||||
p.Mad( r, p, beta );
|
||||
|
||||
// ptilde = ztilde + beta * ptilde;
|
||||
ptilde.Mad( rtilde, ptilde, beta );
|
||||
}
|
||||
|
||||
// q = A * p;
|
||||
A.Product( p, q );
|
||||
|
||||
// qtilde = A^t * ptilde;
|
||||
A.TransProduct( ptilde, qtilde );
|
||||
|
||||
alpha = rho_1 / DenseVectorDotProduct( ptilde, q );
|
||||
|
||||
// x += alpha * p;
|
||||
x.Mad( x, p, alpha );
|
||||
|
||||
// r -= alpha * q;
|
||||
r.Mad( r, q, -alpha );
|
||||
|
||||
// rtilde -= alpha * qtilde;
|
||||
rtilde.Mad( rtilde, qtilde, -alpha );
|
||||
|
||||
rho_2 = rho_1;
|
||||
|
||||
// test convergence
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
// method converges
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
/** Bi-conjugate gradient stabilized method. */
|
||||
int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
|
||||
piDebugCheck( A.IsSquare() );
|
||||
piDebugCheck( A.Width() == b.Dim() );
|
||||
piDebugCheck( A.Width() == x.Dim() );
|
||||
|
||||
int i = 0;
|
||||
const int D = A.Width();
|
||||
const int i_max = 2 * D;
|
||||
|
||||
|
||||
float resid;
|
||||
float rho_1 = 0;
|
||||
float rho_2 = 0;
|
||||
float alpha = 0;
|
||||
float beta = 0;
|
||||
float omega = 0;
|
||||
|
||||
DenseVector p(D);
|
||||
DenseVector phat(D);
|
||||
DenseVector s(D);
|
||||
DenseVector shat(D);
|
||||
DenseVector t(D);
|
||||
DenseVector v(D);
|
||||
|
||||
DenseVector r(D);
|
||||
DenseVector rtilde(D);
|
||||
|
||||
DenseVector tmp(D);
|
||||
|
||||
// r = b - A<>x;
|
||||
A.Product( x, tmp );
|
||||
r.Sub( b, tmp );
|
||||
|
||||
// rtilde = r
|
||||
rtilde.Set( r );
|
||||
|
||||
|
||||
float normb = b.Norm();
|
||||
if( normb == 0.0 ) normb = 1;
|
||||
|
||||
// test convergence
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
// method converges?
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
while( i<i_max ) {
|
||||
|
||||
i++;
|
||||
|
||||
rho_1 = DenseVectorDotProduct( rtilde, r );
|
||||
if( rho_1 == 0 ) {
|
||||
// method fails
|
||||
return -i;
|
||||
}
|
||||
|
||||
|
||||
if( i == 1 ) {
|
||||
p.Set( r );
|
||||
}
|
||||
else {
|
||||
beta = (rho_1 / rho_2) * (alpha / omega);
|
||||
|
||||
// p = r + beta * (p - omega * v);
|
||||
p.Mad( p, v, -omega );
|
||||
p.Mad( r, p, beta );
|
||||
}
|
||||
|
||||
//phat = M.solve(p);
|
||||
phat.Set( p );
|
||||
//Precond( &phat, p );
|
||||
|
||||
//v = A * phat;
|
||||
A.Product( phat, v );
|
||||
|
||||
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
|
||||
|
||||
// s = r - alpha * v;
|
||||
s.Mad( r, v, -alpha );
|
||||
|
||||
|
||||
resid = s.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
// x += alpha * phat;
|
||||
x.Mad( x, phat, alpha );
|
||||
return i;
|
||||
}
|
||||
|
||||
//shat = M.solve(s);
|
||||
shat.Set( s );
|
||||
//Precond( &shat, s );
|
||||
|
||||
//t = A * shat;
|
||||
A.Product( shat, t );
|
||||
|
||||
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
|
||||
|
||||
// x += alpha * phat + omega * shat;
|
||||
x.Mad( x, shat, omega );
|
||||
x.Mad( x, phat, alpha );
|
||||
|
||||
//r = s - omega * t;
|
||||
r.Mad( s, t, -omega );
|
||||
|
||||
rho_2 = rho_1;
|
||||
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
return i;
|
||||
}
|
||||
|
||||
if( omega == 0 ) {
|
||||
return -i; // ???
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
/** Bi-conjugate gradient stabilized method. */
|
||||
int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) {
|
||||
piDebugCheck( A.IsSquare() );
|
||||
piDebugCheck( A.Width() == b.Dim() );
|
||||
piDebugCheck( A.Width() == x.Dim() );
|
||||
|
||||
int i = 0;
|
||||
const int D = A.Width();
|
||||
const int i_max = D;
|
||||
// const int i_max = 1000;
|
||||
|
||||
|
||||
float resid;
|
||||
float rho_1 = 0;
|
||||
float rho_2 = 0;
|
||||
float alpha = 0;
|
||||
float beta = 0;
|
||||
float omega = 0;
|
||||
|
||||
DenseVector p(D);
|
||||
DenseVector phat(D);
|
||||
DenseVector s(D);
|
||||
DenseVector shat(D);
|
||||
DenseVector t(D);
|
||||
DenseVector v(D);
|
||||
|
||||
DenseVector r(D);
|
||||
DenseVector rtilde(D);
|
||||
|
||||
DenseVector tmp(D);
|
||||
|
||||
// r = b - A<>x;
|
||||
A.Product( x, tmp );
|
||||
r.Sub( b, tmp );
|
||||
|
||||
// rtilde = r
|
||||
rtilde.Set( r );
|
||||
|
||||
|
||||
float normb = b.Norm();
|
||||
if( normb == 0.0 ) normb = 1;
|
||||
|
||||
// test convergence
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
// method converges?
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
while( i<i_max ) {
|
||||
|
||||
i++;
|
||||
|
||||
rho_1 = DenseVectorDotProduct( rtilde, r );
|
||||
if( rho_1 == 0 ) {
|
||||
// method fails
|
||||
return -i;
|
||||
}
|
||||
|
||||
|
||||
if( i == 1 ) {
|
||||
p.Set( r );
|
||||
}
|
||||
else {
|
||||
beta = (rho_1 / rho_2) * (alpha / omega);
|
||||
|
||||
// p = r + beta * (p - omega * v);
|
||||
p.Mad( p, v, -omega );
|
||||
p.Mad( r, p, beta );
|
||||
}
|
||||
|
||||
//phat = M.solve(p);
|
||||
//phat.Set( p );
|
||||
M.Precond( &phat, p );
|
||||
|
||||
//v = A * phat;
|
||||
A.Product( phat, v );
|
||||
|
||||
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
|
||||
|
||||
// s = r - alpha * v;
|
||||
s.Mad( r, v, -alpha );
|
||||
|
||||
|
||||
resid = s.Norm() / normb;
|
||||
|
||||
//printf( "--- Iteration %d: residual = %f\n", i, resid );
|
||||
|
||||
if( resid < epsilon ) {
|
||||
// x += alpha * phat;
|
||||
x.Mad( x, phat, alpha );
|
||||
return i;
|
||||
}
|
||||
|
||||
//shat = M.solve(s);
|
||||
//shat.Set( s );
|
||||
M.Precond( &shat, s );
|
||||
|
||||
//t = A * shat;
|
||||
A.Product( shat, t );
|
||||
|
||||
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
|
||||
|
||||
// x += alpha * phat + omega * shat;
|
||||
x.Mad( x, shat, omega );
|
||||
x.Mad( x, phat, alpha );
|
||||
|
||||
//r = s - omega * t;
|
||||
r.Mad( s, t, -omega );
|
||||
|
||||
rho_2 = rho_1;
|
||||
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
return i;
|
||||
}
|
||||
|
||||
if( omega == 0 ) {
|
||||
return -i; // ???
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
#endif
|
@ -1,20 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_MATH_SOLVER_H
|
||||
#define NV_MATH_SOLVER_H
|
||||
|
||||
#include <nvmath/Sparse.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
// Linear solvers.
|
||||
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
|
||||
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f);
|
||||
NVMATH_API void SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
|
||||
// NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
#endif // NV_MATH_SOLVER_H
|
@ -1,831 +0,0 @@
|
||||
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
|
||||
|
||||
#include <nvmath/Sparse.h>
|
||||
#include <nvmath/KahanSum.h>
|
||||
|
||||
using namespace nv;
|
||||
|
||||
|
||||
/// Ctor.
|
||||
FullVector::FullVector(uint dim)
|
||||
{
|
||||
m_array.resize(dim);
|
||||
}
|
||||
|
||||
/// Copy ctor.
|
||||
FullVector::FullVector(const FullVector & v) : m_array(v.m_array)
|
||||
{
|
||||
}
|
||||
|
||||
/// Copy operator
|
||||
const FullVector & FullVector::operator=(const FullVector & v)
|
||||
{
|
||||
nvCheck(dimension() == v.dimension());
|
||||
|
||||
m_array = v.m_array;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
void FullVector::fill(float f)
|
||||
{
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] = f;
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator+= (const FullVector & v)
|
||||
{
|
||||
nvDebugCheck(dimension() == v.dimension());
|
||||
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] += v.m_array[i];
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator-= (const FullVector & v)
|
||||
{
|
||||
nvDebugCheck(dimension() == v.dimension());
|
||||
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] -= v.m_array[i];
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator*= (const FullVector & v)
|
||||
{
|
||||
nvDebugCheck(dimension() == v.dimension());
|
||||
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] *= v.m_array[i];
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator+= (float f)
|
||||
{
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] += f;
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator-= (float f)
|
||||
{
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] -= f;
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator*= (float f)
|
||||
{
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] *= f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::saxpy(float a, const FullVector & x, FullVector & y)
|
||||
{
|
||||
nvDebugCheck(x.dimension() == y.dimension());
|
||||
|
||||
const uint dim = x.dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
y[i] += a * x[i];
|
||||
}
|
||||
}
|
||||
|
||||
void nv::copy(const FullVector & x, FullVector & y)
|
||||
{
|
||||
nvDebugCheck(x.dimension() == y.dimension());
|
||||
|
||||
const uint dim = x.dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
y[i] = x[i];
|
||||
}
|
||||
}
|
||||
|
||||
void nv::scal(float a, FullVector & x)
|
||||
{
|
||||
const uint dim = x.dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
x[i] *= a;
|
||||
}
|
||||
}
|
||||
|
||||
float nv::dot(const FullVector & x, const FullVector & y)
|
||||
{
|
||||
nvDebugCheck(x.dimension() == y.dimension());
|
||||
|
||||
const uint dim = x.dimension();
|
||||
|
||||
/*float sum = 0;
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
sum += x[i] * y[i];
|
||||
}
|
||||
return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
kahan.add(x[i] * y[i]);
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
|
||||
FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d)
|
||||
{
|
||||
m_array.resize(d*d, 0.0f);
|
||||
}
|
||||
|
||||
FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h)
|
||||
{
|
||||
m_array.resize(w*h, 0.0f);
|
||||
}
|
||||
|
||||
FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height)
|
||||
{
|
||||
m_array = m.m_array;
|
||||
}
|
||||
|
||||
const FullMatrix & FullMatrix::operator=(const FullMatrix & m)
|
||||
{
|
||||
nvCheck(width() == m.width());
|
||||
nvCheck(height() == m.height());
|
||||
|
||||
m_array = m.m_array;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
float FullMatrix::getCoefficient(uint x, uint y) const
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
return m_array[y * width() + x];
|
||||
}
|
||||
|
||||
void FullMatrix::setCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
m_array[y * width() + x] = f;
|
||||
}
|
||||
|
||||
void FullMatrix::addCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
m_array[y * width() + x] += f;
|
||||
}
|
||||
|
||||
void FullMatrix::mulCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
m_array[y * width() + x] *= f;
|
||||
}
|
||||
|
||||
float FullMatrix::dotRow(uint y, const FullVector & v) const
|
||||
{
|
||||
nvDebugCheck( v.dimension() == width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
float sum = 0;
|
||||
|
||||
const uint count = v.dimension();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
sum += m_array[y * count + i] * v[i];
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
void FullMatrix::madRow(uint y, float alpha, FullVector & v) const
|
||||
{
|
||||
nvDebugCheck( v.dimension() == width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = v.dimension();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
v[i] += m_array[y * count + i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// y = M * x
|
||||
void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y)
|
||||
{
|
||||
mult(NoTransposed, M, x, y);
|
||||
}
|
||||
|
||||
void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y)
|
||||
{
|
||||
const uint w = M.width();
|
||||
const uint h = M.height();
|
||||
|
||||
if (TM == Transposed)
|
||||
{
|
||||
nvDebugCheck( h == x.dimension() );
|
||||
nvDebugCheck( w == y.dimension() );
|
||||
|
||||
y.fill(0.0f);
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
M.madRow(i, x[i], y);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nvDebugCheck( w == x.dimension() );
|
||||
nvDebugCheck( h == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
y[i] = M.dotRow(i, x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// y = alpha*A*x + beta*y
|
||||
void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
|
||||
{
|
||||
sgemv(alpha, NoTransposed, A, x, beta, y);
|
||||
}
|
||||
|
||||
void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
|
||||
{
|
||||
const uint w = A.width();
|
||||
const uint h = A.height();
|
||||
|
||||
if (TA == Transposed)
|
||||
{
|
||||
nvDebugCheck( h == x.dimension() );
|
||||
nvDebugCheck( w == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
A.madRow(i, alpha * x[i], y);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nvDebugCheck( w == x.dimension() );
|
||||
nvDebugCheck( h == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Multiply a row of A by a column of B.
|
||||
static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B)
|
||||
{
|
||||
const uint w = (TA == NoTransposed) ? A.width() : A.height();
|
||||
nvDebugCheck(w == (TB == NoTransposed) ? B.height() : A.width());
|
||||
|
||||
float sum = 0.0f;
|
||||
|
||||
for (uint k = 0; k < w; k++)
|
||||
{
|
||||
const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k); // @@ Move branches out of the loop?
|
||||
const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i);
|
||||
sum += a * b;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
// C = A * B
|
||||
void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C)
|
||||
{
|
||||
mult(NoTransposed, A, NoTransposed, B, C);
|
||||
}
|
||||
|
||||
void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C)
|
||||
{
|
||||
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
|
||||
}
|
||||
|
||||
// C = alpha*A*B + beta*C
|
||||
void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C)
|
||||
{
|
||||
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
|
||||
}
|
||||
|
||||
void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C)
|
||||
{
|
||||
const uint w = C.width();
|
||||
const uint h = C.height();
|
||||
|
||||
uint aw = (TA == NoTransposed) ? A.width() : A.height();
|
||||
uint ah = (TA == NoTransposed) ? A.height() : A.width();
|
||||
uint bw = (TB == NoTransposed) ? B.width() : B.height();
|
||||
uint bh = (TB == NoTransposed) ? B.height() : B.width();
|
||||
|
||||
nvDebugCheck(aw == bh);
|
||||
nvDebugCheck(bw == ah);
|
||||
nvDebugCheck(w == bw);
|
||||
nvDebugCheck(h == ah);
|
||||
|
||||
for (uint y = 0; y < h; y++)
|
||||
{
|
||||
for (uint x = 0; x < w; x++)
|
||||
{
|
||||
float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y);
|
||||
C.setCoefficient(x, y, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/// Ctor. Init the size of the sparse matrix.
|
||||
SparseMatrix::SparseMatrix(uint d) : m_width(d)
|
||||
{
|
||||
m_array.resize(d);
|
||||
}
|
||||
|
||||
/// Ctor. Init the size of the sparse matrix.
|
||||
SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w)
|
||||
{
|
||||
m_array.resize(h);
|
||||
}
|
||||
|
||||
SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width)
|
||||
{
|
||||
m_array = m.m_array;
|
||||
}
|
||||
|
||||
const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m)
|
||||
{
|
||||
nvCheck(width() == m.width());
|
||||
nvCheck(height() == m.height());
|
||||
|
||||
m_array = m.m_array;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
// x is column, y is row
|
||||
float SparseMatrix::getCoefficient(uint x, uint y) const
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
if (m_array[y][i].x == x) return m_array[y][i].v;
|
||||
}
|
||||
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
void SparseMatrix::setCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
if (m_array[y][i].x == x)
|
||||
{
|
||||
m_array[y][i].v = f;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Coefficient c = { x, f };
|
||||
m_array[y].append( c );
|
||||
}
|
||||
|
||||
void SparseMatrix::addCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
if (m_array[y][i].x == x)
|
||||
{
|
||||
m_array[y][i].v += f;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Coefficient c = { x, f };
|
||||
m_array[y].append( c );
|
||||
}
|
||||
|
||||
void SparseMatrix::mulCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
if (m_array[y][i].x == x)
|
||||
{
|
||||
m_array[y][i].v *= f;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Coefficient c = { x, f };
|
||||
m_array[y].append( c );
|
||||
}
|
||||
|
||||
|
||||
float SparseMatrix::sumRow(uint y) const
|
||||
{
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
|
||||
/*float sum = 0;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
sum += m_array[y][i].v;
|
||||
}
|
||||
return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
kahan.add(m_array[y][i].v);
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
float SparseMatrix::dotRow(uint y, const FullVector & v) const
|
||||
{
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
|
||||
/*float sum = 0;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
sum += m_array[y][i].v * v[m_array[y][i].x];
|
||||
}
|
||||
return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
kahan.add(m_array[y][i].v * v[m_array[y][i].x]);
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const
|
||||
{
|
||||
nvDebugCheck(y < height());
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
v[m_array[y][i].x] += alpha * m_array[y][i].v;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SparseMatrix::clearRow(uint y)
|
||||
{
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
m_array[y].clear();
|
||||
}
|
||||
|
||||
void SparseMatrix::scaleRow(uint y, float f)
|
||||
{
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
m_array[y][i].v *= f;
|
||||
}
|
||||
}
|
||||
|
||||
void SparseMatrix::normalizeRow(uint y)
|
||||
{
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
float norm = 0.0f;
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
float f = m_array[y][i].v;
|
||||
norm += f * f;
|
||||
}
|
||||
|
||||
scaleRow(y, 1.0f / sqrtf(norm));
|
||||
}
|
||||
|
||||
|
||||
void SparseMatrix::clearColumn(uint x)
|
||||
{
|
||||
nvDebugCheck(x < width());
|
||||
|
||||
for (uint y = 0; y < height(); y++)
|
||||
{
|
||||
const uint count = m_array[y].count();
|
||||
for (uint e = 0; e < count; e++)
|
||||
{
|
||||
if (m_array[y][e].x == x)
|
||||
{
|
||||
m_array[y][e].v = 0.0f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SparseMatrix::scaleColumn(uint x, float f)
|
||||
{
|
||||
nvDebugCheck(x < width());
|
||||
|
||||
for (uint y = 0; y < height(); y++)
|
||||
{
|
||||
const uint count = m_array[y].count();
|
||||
for (uint e = 0; e < count; e++)
|
||||
{
|
||||
if (m_array[y][e].x == x)
|
||||
{
|
||||
m_array[y][e].v *= f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Array<SparseMatrix::Coefficient> & SparseMatrix::getRow(uint y) const
|
||||
{
|
||||
return m_array[y];
|
||||
}
|
||||
|
||||
|
||||
// y = M * x
|
||||
void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y)
|
||||
{
|
||||
mult(NoTransposed, M, x, y);
|
||||
}
|
||||
|
||||
void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y)
|
||||
{
|
||||
const uint w = M.width();
|
||||
const uint h = M.height();
|
||||
|
||||
if (TM == Transposed)
|
||||
{
|
||||
nvDebugCheck( h == x.dimension() );
|
||||
nvDebugCheck( w == y.dimension() );
|
||||
|
||||
y.fill(0.0f);
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
M.madRow(i, x[i], y);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nvDebugCheck( w == x.dimension() );
|
||||
nvDebugCheck( h == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
y[i] = M.dotRow(i, x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// y = alpha*A*x + beta*y
|
||||
void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
|
||||
{
|
||||
sgemv(alpha, NoTransposed, A, x, beta, y);
|
||||
}
|
||||
|
||||
void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
|
||||
{
|
||||
const uint w = A.width();
|
||||
const uint h = A.height();
|
||||
|
||||
if (TA == Transposed)
|
||||
{
|
||||
nvDebugCheck( h == x.dimension() );
|
||||
nvDebugCheck( w == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
A.madRow(i, alpha * x[i], y);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nvDebugCheck( w == x.dimension() );
|
||||
nvDebugCheck( h == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// dot y-row of A by x-column of B
|
||||
static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
|
||||
{
|
||||
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
|
||||
|
||||
const uint count = row.count();
|
||||
|
||||
/*float sum = 0.0f;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
const SparseMatrix::Coefficient & c = row[i];
|
||||
sum += c.v * B.getCoefficient(x, c.x);
|
||||
}
|
||||
return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
const SparseMatrix::Coefficient & c = row[i];
|
||||
kahan.add(c.v * B.getCoefficient(x, c.x));
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
// dot y-row of A by x-row of B
|
||||
static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
|
||||
{
|
||||
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
|
||||
|
||||
const uint count = row.count();
|
||||
|
||||
/*float sum = 0.0f;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
const SparseMatrix::Coefficient & c = row[i];
|
||||
sum += c.v * B.getCoefficient(c.x, x);
|
||||
}
|
||||
//return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
const SparseMatrix::Coefficient & c = row[i];
|
||||
kahan.add(c.v * B.getCoefficient(c.x, x));
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
// dot y-column of A by x-column of B
|
||||
static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
|
||||
{
|
||||
nvDebugCheck(A.height() == B.height());
|
||||
|
||||
const uint h = A.height();
|
||||
|
||||
/*float sum = 0.0f;
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
sum += A.getCoefficient(y, i) * B.getCoefficient(x, i);
|
||||
}
|
||||
//return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i));
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
|
||||
|
||||
// C = A * B
|
||||
void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C)
|
||||
{
|
||||
mult(NoTransposed, A, NoTransposed, B, C);
|
||||
}
|
||||
|
||||
void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C)
|
||||
{
|
||||
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
|
||||
}
|
||||
|
||||
// C = alpha*A*B + beta*C
|
||||
void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C)
|
||||
{
|
||||
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
|
||||
}
|
||||
|
||||
void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C)
|
||||
{
|
||||
const uint w = C.width();
|
||||
const uint h = C.height();
|
||||
|
||||
uint aw = (TA == NoTransposed) ? A.width() : A.height();
|
||||
uint ah = (TA == NoTransposed) ? A.height() : A.width();
|
||||
uint bw = (TB == NoTransposed) ? B.width() : B.height();
|
||||
uint bh = (TB == NoTransposed) ? B.height() : B.width();
|
||||
|
||||
nvDebugCheck(aw == bh);
|
||||
nvDebugCheck(bw == ah);
|
||||
nvDebugCheck(w == bw);
|
||||
nvDebugCheck(h == ah);
|
||||
|
||||
|
||||
for (uint y = 0; y < h; y++)
|
||||
{
|
||||
for (uint x = 0; x < w; x++)
|
||||
{
|
||||
float c = beta * C.getCoefficient(x, y);
|
||||
|
||||
if (TA == NoTransposed && TB == NoTransposed)
|
||||
{
|
||||
// dot y-row of A by x-column of B.
|
||||
c += alpha * dotRowColumn(y, A, x, B);
|
||||
}
|
||||
else if (TA == Transposed && TB == Transposed)
|
||||
{
|
||||
// dot y-column of A by x-row of B.
|
||||
c += alpha * dotRowColumn(x, B, y, A);
|
||||
}
|
||||
else if (TA == Transposed && TB == NoTransposed)
|
||||
{
|
||||
// dot y-column of A by x-column of B.
|
||||
c += alpha * dotColumnColumn(y, A, x, B);
|
||||
}
|
||||
else if (TA == NoTransposed && TB == Transposed)
|
||||
{
|
||||
// dot y-row of A by x-row of B.
|
||||
c += alpha * dotRowRow(y, A, x, B);
|
||||
}
|
||||
|
||||
if (c != 0.0f)
|
||||
{
|
||||
C.setCoefficient(x, y, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// C = At * A
|
||||
void nv::sqm(const SparseMatrix & A, SparseMatrix & C)
|
||||
{
|
||||
// This is quite expensive...
|
||||
mult(Transposed, A, NoTransposed, A, C);
|
||||
}
|
@ -1,198 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_MATH_SPARSE_H
|
||||
#define NV_MATH_SPARSE_H
|
||||
|
||||
#include <nvcore/Containers.h>
|
||||
#include <nvmath/nvmath.h>
|
||||
|
||||
// Full and sparse vector and matrix classes. BLAS subset.
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class FullVector;
|
||||
class FullMatrix;
|
||||
class SparseMatrix;
|
||||
|
||||
|
||||
/// Fixed size vector class.
|
||||
class FullVector
|
||||
{
|
||||
public:
|
||||
|
||||
FullVector(uint dim);
|
||||
FullVector(const FullVector & v);
|
||||
|
||||
const FullVector & operator=(const FullVector & v);
|
||||
|
||||
uint dimension() const { return m_array.count(); }
|
||||
|
||||
const float & operator[]( uint index ) const { return m_array[index]; }
|
||||
float & operator[] ( uint index ) { return m_array[index]; }
|
||||
|
||||
void fill(float f);
|
||||
|
||||
void operator+= (const FullVector & v);
|
||||
void operator-= (const FullVector & v);
|
||||
void operator*= (const FullVector & v);
|
||||
|
||||
void operator+= (float f);
|
||||
void operator-= (float f);
|
||||
void operator*= (float f);
|
||||
|
||||
|
||||
private:
|
||||
|
||||
Array<float> m_array;
|
||||
|
||||
};
|
||||
|
||||
// Pseudo-BLAS interface.
|
||||
NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y); // y = a * x + y
|
||||
NVMATH_API void copy(const FullVector & x, FullVector & y);
|
||||
NVMATH_API void scal(float a, FullVector & x);
|
||||
NVMATH_API float dot(const FullVector & x, const FullVector & y);
|
||||
|
||||
|
||||
enum Transpose
|
||||
{
|
||||
NoTransposed = 0,
|
||||
Transposed = 1
|
||||
};
|
||||
|
||||
/// Full matrix class.
|
||||
class FullMatrix
|
||||
{
|
||||
public:
|
||||
|
||||
FullMatrix(uint d);
|
||||
FullMatrix(uint w, uint h);
|
||||
FullMatrix(const FullMatrix & m);
|
||||
|
||||
const FullMatrix & operator=(const FullMatrix & m);
|
||||
|
||||
uint width() const { return m_width; }
|
||||
uint height() const { return m_height; }
|
||||
bool isSquare() const { return m_width == m_height; }
|
||||
|
||||
float getCoefficient(uint x, uint y) const;
|
||||
|
||||
void setCoefficient(uint x, uint y, float f);
|
||||
void addCoefficient(uint x, uint y, float f);
|
||||
void mulCoefficient(uint x, uint y, float f);
|
||||
|
||||
float dotRow(uint y, const FullVector & v) const;
|
||||
void madRow(uint y, float alpha, FullVector & v) const;
|
||||
|
||||
protected:
|
||||
|
||||
bool isValid() const {
|
||||
return m_array.size() == (m_width * m_height);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
const uint m_width;
|
||||
const uint m_height;
|
||||
Array<float> m_array;
|
||||
|
||||
};
|
||||
|
||||
NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y);
|
||||
NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y);
|
||||
|
||||
// y = alpha*A*x + beta*y
|
||||
NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
|
||||
NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
|
||||
|
||||
NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C);
|
||||
NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C);
|
||||
|
||||
// C = alpha*A*B + beta*C
|
||||
NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C);
|
||||
NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C);
|
||||
|
||||
|
||||
/**
|
||||
* Sparse matrix class. The matrix is assumed to be sparse and to have
|
||||
* very few non-zero elements, for this reason it's stored in indexed
|
||||
* format. To multiply column vectors efficiently, the matrix stores
|
||||
* the elements in indexed-column order, there is a list of indexed
|
||||
* elements for each row of the matrix. As with the FullVector the
|
||||
* dimension of the matrix is constant.
|
||||
**/
|
||||
class SparseMatrix
|
||||
{
|
||||
friend class FullMatrix;
|
||||
public:
|
||||
|
||||
// An element of the sparse array.
|
||||
struct Coefficient {
|
||||
uint x; // column
|
||||
float v; // value
|
||||
};
|
||||
|
||||
|
||||
public:
|
||||
|
||||
SparseMatrix(uint d);
|
||||
SparseMatrix(uint w, uint h);
|
||||
SparseMatrix(const SparseMatrix & m);
|
||||
|
||||
const SparseMatrix & operator=(const SparseMatrix & m);
|
||||
|
||||
|
||||
uint width() const { return m_width; }
|
||||
uint height() const { return m_array.count(); }
|
||||
bool isSquare() const { return width() == height(); }
|
||||
|
||||
float getCoefficient(uint x, uint y) const; // x is column, y is row
|
||||
|
||||
void setCoefficient(uint x, uint y, float f);
|
||||
void addCoefficient(uint x, uint y, float f);
|
||||
void mulCoefficient(uint x, uint y, float f);
|
||||
|
||||
float sumRow(uint y) const;
|
||||
float dotRow(uint y, const FullVector & v) const;
|
||||
void madRow(uint y, float alpha, FullVector & v) const;
|
||||
|
||||
void clearRow(uint y);
|
||||
void scaleRow(uint y, float f);
|
||||
void normalizeRow(uint y);
|
||||
|
||||
void clearColumn(uint x);
|
||||
void scaleColumn(uint x, float f);
|
||||
|
||||
const Array<Coefficient> & getRow(uint y) const;
|
||||
|
||||
private:
|
||||
|
||||
/// Number of columns.
|
||||
const uint m_width;
|
||||
|
||||
/// Array of matrix elements.
|
||||
Array< Array<Coefficient> > m_array;
|
||||
|
||||
};
|
||||
|
||||
NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y);
|
||||
NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y);
|
||||
|
||||
// y = alpha*A*x + beta*y
|
||||
NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
|
||||
NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
|
||||
|
||||
NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C);
|
||||
NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C);
|
||||
|
||||
// C = alpha*A*B + beta*C
|
||||
NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C);
|
||||
NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C);
|
||||
|
||||
// C = At * A
|
||||
NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C);
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
#endif // NV_MATH_SPARSE_H
|
@ -3,7 +3,6 @@
|
||||
#ifndef NV_MATH_SPHERICALHARMONIC_H
|
||||
#define NV_MATH_SPHERICALHARMONIC_H
|
||||
|
||||
#include <string.h> // memcpy
|
||||
#include <nvmath/Vector.h>
|
||||
|
||||
namespace nv
|
||||
|
@ -96,7 +96,7 @@ static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg
|
||||
if(min>rad || max<-rad) return false;
|
||||
|
||||
|
||||
bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
|
||||
bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
|
||||
{
|
||||
// use separating axis theorem to test overlap between triangle and box
|
||||
// need to test for overlap in these directions:
|
||||
@ -170,7 +170,7 @@ bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const T
|
||||
}
|
||||
|
||||
|
||||
bool nv::triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
|
||||
bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
|
||||
{
|
||||
// use separating axis theorem to test overlap between triangle and box
|
||||
// need to test for overlap in these directions:
|
||||
|
@ -1,82 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#include <nvcore/Stream.h>
|
||||
|
||||
#include <nvmath/Vector.h>
|
||||
#include <nvmath/Matrix.h>
|
||||
#include <nvmath/Quaternion.h>
|
||||
#include <nvmath/Basis.h>
|
||||
#include <nvmath/Box.h>
|
||||
|
||||
#include <nvmath/TypeSerialization.h>
|
||||
|
||||
using namespace nv;
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Vector2 & v)
|
||||
{
|
||||
float x = v.x();
|
||||
float y = v.y();
|
||||
|
||||
s << x << y;
|
||||
|
||||
if (s.isLoading())
|
||||
{
|
||||
v.set(x, y);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Vector3 & v)
|
||||
{
|
||||
float x = v.x();
|
||||
float y = v.y();
|
||||
float z = v.z();
|
||||
|
||||
s << x << y << z;
|
||||
|
||||
if (s.isLoading())
|
||||
{
|
||||
v.set(x, y, z);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Vector4 & v)
|
||||
{
|
||||
float x = v.x();
|
||||
float y = v.y();
|
||||
float z = v.z();
|
||||
float w = v.w();
|
||||
|
||||
s << x << y << z << w;
|
||||
|
||||
if (s.isLoading())
|
||||
{
|
||||
v.set(x, y, z, w);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Matrix & m)
|
||||
{
|
||||
return s;
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Quaternion & q)
|
||||
{
|
||||
return s << q.asVector();
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Basis & basis)
|
||||
{
|
||||
return s << basis.tangent << basis.bitangent << basis.normal;
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Box & box)
|
||||
{
|
||||
return s << box.m_mins << box.m_maxs;
|
||||
}
|
||||
|
@ -1,32 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_MATH_TYPESERIALIZATION_H
|
||||
#define NV_MATH_TYPESERIALIZATION_H
|
||||
|
||||
#include <nvmath/nvmath.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class Stream;
|
||||
|
||||
class Vector2;
|
||||
class Vector3;
|
||||
class Vector4;
|
||||
|
||||
class Matrix;
|
||||
class Quaternion;
|
||||
struct Basis;
|
||||
class Box;
|
||||
|
||||
NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj);
|
||||
NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj);
|
||||
NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj);
|
||||
|
||||
NVMATH_API Stream & operator<< (Stream & s, Matrix & obj);
|
||||
NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj);
|
||||
NVMATH_API Stream & operator<< (Stream & s, Basis & obj);
|
||||
NVMATH_API Stream & operator<< (Stream & s, Box & obj);
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_MATH_TYPESERIALIZATION_H
|
@ -4,7 +4,7 @@
|
||||
#define NV_MATH_VECTOR_H
|
||||
|
||||
#include <nvmath/nvmath.h>
|
||||
#include <nvcore/Algorithms.h> // min, max
|
||||
#include <nvcore/Containers.h> // min, max
|
||||
|
||||
namespace nv
|
||||
{
|
||||
@ -71,7 +71,6 @@ public:
|
||||
const Vector2 & xy() const;
|
||||
|
||||
scalar component(uint idx) const;
|
||||
void setComponent(uint idx, scalar f);
|
||||
|
||||
const scalar * ptr() const;
|
||||
|
||||
@ -240,21 +239,13 @@ inline const Vector2 & Vector3::xy() const
|
||||
inline scalar Vector3::component(uint idx) const
|
||||
{
|
||||
nvDebugCheck(idx < 3);
|
||||
if (idx == 0) return m_x;
|
||||
if (idx == 1) return m_y;
|
||||
if (idx == 2) return m_z;
|
||||
if (idx == 0) return x();
|
||||
if (idx == 1) return y();
|
||||
if (idx == 2) return z();
|
||||
nvAssume(false);
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
inline void Vector3::setComponent(uint idx, float f)
|
||||
{
|
||||
nvDebugCheck(idx < 3);
|
||||
if (idx == 0) m_x = f;
|
||||
else if (idx == 1) m_y = f;
|
||||
else if (idx == 2) m_z = f;
|
||||
}
|
||||
|
||||
inline const scalar * Vector3::ptr() const
|
||||
{
|
||||
return &m_x;
|
||||
@ -486,35 +477,6 @@ inline scalar length(Vector2::Arg v)
|
||||
return sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline scalar inverse_length(Vector2::Arg v)
|
||||
{
|
||||
return 1.0f / sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
|
||||
{
|
||||
return equal(length(v), 1, epsilon);
|
||||
}
|
||||
|
||||
inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
|
||||
{
|
||||
float l = length(v);
|
||||
nvDebugCheck(!isZero(l, epsilon));
|
||||
Vector2 n = scale(v, 1.0f / l);
|
||||
nvDebugCheck(isNormalized(n));
|
||||
return n;
|
||||
}
|
||||
|
||||
inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
|
||||
{
|
||||
float l = length(v);
|
||||
if (isZero(l, epsilon)) {
|
||||
return fallback;
|
||||
}
|
||||
return scale(v, 1.0f / l);
|
||||
}
|
||||
|
||||
|
||||
inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
|
||||
{
|
||||
return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon);
|
||||
@ -633,11 +595,6 @@ inline scalar length(Vector3::Arg v)
|
||||
return sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline scalar inverse_length(Vector3::Arg v)
|
||||
{
|
||||
return 1.0f / sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
|
||||
{
|
||||
return equal(length(v), 1, epsilon);
|
||||
@ -759,11 +716,6 @@ inline scalar length(Vector4::Arg v)
|
||||
return sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline scalar inverse_length(Vector4::Arg v)
|
||||
{
|
||||
return 1.0f / sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
|
||||
{
|
||||
return equal(length(v), 1, epsilon);
|
||||
|
@ -48,19 +48,37 @@
|
||||
#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)
|
||||
*/
|
||||
|
||||
inline float sqrt_assert(const float f)
|
||||
inline double sqrt_assert(const double f)
|
||||
{
|
||||
nvDebugCheck(f >= 0.0f);
|
||||
return sqrt(f);
|
||||
}
|
||||
|
||||
inline float sqrtf_assert(const float f)
|
||||
{
|
||||
nvDebugCheck(f >= 0.0f);
|
||||
return sqrtf(f);
|
||||
}
|
||||
|
||||
inline float acos_assert(const float f)
|
||||
inline double acos_assert(const double f)
|
||||
{
|
||||
nvDebugCheck(f >= -1.0f && f <= 1.0f);
|
||||
return acos(f);
|
||||
}
|
||||
|
||||
inline float acosf_assert(const float f)
|
||||
{
|
||||
nvDebugCheck(f >= -1.0f && f <= 1.0f);
|
||||
return acosf(f);
|
||||
}
|
||||
|
||||
inline float asin_assert(const float f)
|
||||
inline double asin_assert(const double f)
|
||||
{
|
||||
nvDebugCheck(f >= -1.0f && f <= 1.0f);
|
||||
return asin(f);
|
||||
}
|
||||
|
||||
inline float asinf_assert(const float f)
|
||||
{
|
||||
nvDebugCheck(f >= -1.0f && f <= 1.0f);
|
||||
return asinf(f);
|
||||
@ -68,11 +86,11 @@ inline float asin_assert(const float f)
|
||||
|
||||
// Replace default functions with asserting ones.
|
||||
#define sqrt sqrt_assert
|
||||
#define sqrtf sqrt_assert
|
||||
#define sqrtf sqrtf_assert
|
||||
#define acos acos_assert
|
||||
#define acosf acos_assert
|
||||
#define acosf acosf_assert
|
||||
#define asin asin_assert
|
||||
#define asinf asin_assert
|
||||
#define asinf asinf_assert
|
||||
|
||||
#if NV_OS_WIN32
|
||||
#include <float.h>
|
||||
|
@ -13,10 +13,10 @@ SET(NVTT_SRCS
|
||||
CompressDXT.cpp
|
||||
CompressRGB.h
|
||||
CompressRGB.cpp
|
||||
FastCompressDXT.h
|
||||
FastCompressDXT.cpp
|
||||
QuickCompressDXT.h
|
||||
QuickCompressDXT.cpp
|
||||
OptimalCompressDXT.h
|
||||
OptimalCompressDXT.cpp
|
||||
SingleColorLookup.h
|
||||
CompressionOptions.h
|
||||
CompressionOptions.cpp
|
||||
@ -43,8 +43,9 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
ADD_DEFINITIONS(-DNVTT_EXPORTS)
|
||||
|
||||
IF(NVTT_SHARED)
|
||||
ADD_LIBRARY(nvtt SHARED ${DXT_SRCS})
|
||||
IF(NVTT_SHARED)
|
||||
ADD_DEFINITIONS(-DNVTT_SHARED=1)
|
||||
ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
|
||||
ELSE(NVTT_SHARED)
|
||||
ADD_LIBRARY(nvtt ${NVTT_SRCS})
|
||||
ENDIF(NVTT_SHARED)
|
||||
@ -79,15 +80,12 @@ TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
|
||||
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
|
||||
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
|
||||
|
||||
ADD_EXECUTABLE(stress tests/stress.cpp tools/cmdline.h)
|
||||
TARGET_LINK_LIBRARIES(stress nvcore nvmath nvimage nvtt)
|
||||
|
||||
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
|
||||
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
|
||||
|
||||
INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom DESTINATION bin)
|
||||
|
||||
# UI tools
|
||||
# UI tools
|
||||
IF(QT4_FOUND AND NOT MSVC)
|
||||
SET(QT_USE_QTOPENGL TRUE)
|
||||
INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
@ -109,7 +107,7 @@ IF(QT4_FOUND AND NOT MSVC)
|
||||
|
||||
ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS})
|
||||
TARGET_LINK_LIBRARIES(nvcompressui ${LIBS})
|
||||
|
||||
|
||||
ENDIF(QT4_FOUND AND NOT MSVC)
|
||||
|
||||
|
||||
|
@ -29,8 +29,8 @@
|
||||
|
||||
#include "nvtt.h"
|
||||
#include "CompressDXT.h"
|
||||
#include "FastCompressDXT.h"
|
||||
#include "QuickCompressDXT.h"
|
||||
#include "OptimalCompressDXT.h"
|
||||
#include "CompressionOptions.h"
|
||||
#include "OutputOptions.h"
|
||||
|
||||
@ -57,26 +57,33 @@ using namespace nv;
|
||||
using namespace nvtt;
|
||||
|
||||
|
||||
void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
|
||||
nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
}
|
||||
|
||||
nv::FastCompressor::~FastCompressor()
|
||||
{
|
||||
}
|
||||
|
||||
void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
|
||||
{
|
||||
m_image = image;
|
||||
m_alphaMode = alphaMode;
|
||||
}
|
||||
|
||||
void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT1 block;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
rgba.init(image, x, y);
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
QuickCompress::compressDXT1(rgba.color(0), &block);
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCompress::compressDXT1(rgba, &block);
|
||||
}
|
||||
QuickCompress::compressDXT1(rgba, &block);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
@ -86,27 +93,19 @@ void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & ou
|
||||
}
|
||||
|
||||
|
||||
void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & outputOptions)
|
||||
void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT1 block;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
rgba.init(image, x, y);
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
// @@ We could do better here: check for single RGB, but varying alpha.
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
QuickCompress::compressDXT1a(rgba.color(0), &block);
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCompress::compressDXT1a(rgba, &block);
|
||||
}
|
||||
QuickCompress::compressDXT1a(rgba, &block);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
@ -116,17 +115,18 @@ void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & o
|
||||
}
|
||||
|
||||
|
||||
void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
|
||||
void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT3 block;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
rgba.init(image, x, y);
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
QuickCompress::compressDXT3(rgba, &block);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
@ -137,19 +137,19 @@ void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Privat
|
||||
}
|
||||
|
||||
|
||||
void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
|
||||
void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT5 block;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
rgba.init(image, x, y);
|
||||
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!!
|
||||
nv::compressBlock_BoundsRange(rgba, &block);
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
QuickCompress::compressDXT5(rgba, &block, 0);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
@ -159,23 +159,21 @@ void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Privat
|
||||
}
|
||||
|
||||
|
||||
void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
|
||||
void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT5 block;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
rgba.init(image, x, y);
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
// copy X coordinate to alpha channel and Y coordinate to green channel.
|
||||
rgba.swizzleDXT5n();
|
||||
|
||||
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!!
|
||||
nv::compressBlock_BoundsRange(rgba, &block);
|
||||
QuickCompress::compressDXT5(rgba, &block, 0);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
@ -185,59 +183,45 @@ void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Priva
|
||||
}
|
||||
|
||||
|
||||
void nv::fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
|
||||
nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
|
||||
{
|
||||
// @@ TODO
|
||||
// compress red channel (X)
|
||||
}
|
||||
|
||||
|
||||
void nv::fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
|
||||
nv::SlowCompressor::~SlowCompressor()
|
||||
{
|
||||
// @@ TODO
|
||||
// compress red, green channels (X,Y)
|
||||
}
|
||||
|
||||
|
||||
void nv::doPrecomputation()
|
||||
void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
|
||||
{
|
||||
static bool done = false; // @@ Stop using statics for reentrancy. Although the worst that could happen is that this stuff is precomputed multiple times.
|
||||
|
||||
if (!done)
|
||||
{
|
||||
done = true;
|
||||
squish::FastClusterFit::DoPrecomputation();
|
||||
}
|
||||
m_image = image;
|
||||
m_alphaMode = alphaMode;
|
||||
}
|
||||
|
||||
|
||||
void nv::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT1 block;
|
||||
|
||||
doPrecomputation();
|
||||
|
||||
//squish::WeightedClusterFit fit;
|
||||
squish::WeightedClusterFit fit;
|
||||
//squish::ClusterFit fit;
|
||||
squish::FastClusterFit fit;
|
||||
//squish::FastClusterFit fit;
|
||||
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
rgba.init(image, x, y);
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
QuickCompress::compressDXT1(rgba.color(0), &block);
|
||||
OptimalCompress::compressDXT1(rgba.color(0), &block);
|
||||
}
|
||||
else
|
||||
{
|
||||
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
|
||||
squish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
|
||||
fit.SetColourSet(&colours, squish::kDxt1);
|
||||
fit.Compress(&block);
|
||||
}
|
||||
@ -250,10 +234,10 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
|
||||
}
|
||||
|
||||
|
||||
void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT1 block;
|
||||
@ -264,11 +248,20 @@ void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outpu
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
rgba.init(image, x, y);
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
if (rgba.isSingleColor())
|
||||
bool anyAlpha = false;
|
||||
bool allAlpha = true;
|
||||
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
QuickCompress::compressDXT1a(rgba.color(0), &block);
|
||||
if (rgba.color(i).a < 128) anyAlpha = true;
|
||||
else allAlpha = false;
|
||||
}
|
||||
|
||||
if ((!anyAlpha && rgba.isSingleColor() || allAlpha))
|
||||
{
|
||||
OptimalCompress::compressDXT1a(rgba.color(0), &block);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -285,29 +278,37 @@ void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outpu
|
||||
}
|
||||
|
||||
|
||||
void nv::compressDXT3(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT3 block;
|
||||
|
||||
squish::WeightedClusterFit fit;
|
||||
//squish::FastClusterFit fit;
|
||||
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
rgba.init(image, x, y);
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
// Compress explicit alpha.
|
||||
QuickCompress::compressDXT3A(rgba, &block.alpha);
|
||||
|
||||
OptimalCompress::compressDXT3A(rgba, &block.alpha);
|
||||
|
||||
// Compress color.
|
||||
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block.color);
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
|
||||
}
|
||||
else
|
||||
{
|
||||
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block.color);
|
||||
}
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
@ -316,10 +317,10 @@ void nv::compressDXT3(const Image * image, const OutputOptions::Private & output
|
||||
}
|
||||
}
|
||||
|
||||
void nv::compressDXT5(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT5 block;
|
||||
@ -330,22 +331,29 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
rgba.init(image, x, y);
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
// Compress alpha.
|
||||
if (compressionOptions.quality == Quality_Highest)
|
||||
{
|
||||
compressBlock_BruteForce(rgba, &block.alpha);
|
||||
OptimalCompress::compressDXT5A(rgba, &block.alpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCompress::compressDXT5A(rgba, &block.alpha);
|
||||
}
|
||||
|
||||
|
||||
// Compress color.
|
||||
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block.color);
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
|
||||
}
|
||||
else
|
||||
{
|
||||
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block.color);
|
||||
}
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
@ -355,28 +363,25 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
|
||||
}
|
||||
|
||||
|
||||
void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT5 block;
|
||||
|
||||
doPrecomputation();
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
rgba.init(image, x, y);
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
// copy X coordinate to green channel and Y coordinate to alpha channel.
|
||||
rgba.swizzleDXT5n();
|
||||
|
||||
// Compress X.
|
||||
if (compressionOptions.quality == Quality_Highest)
|
||||
{
|
||||
compressBlock_BruteForce(rgba, &block.alpha);
|
||||
OptimalCompress::compressDXT5A(rgba, &block.alpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -384,7 +389,7 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
|
||||
}
|
||||
|
||||
// Compress Y.
|
||||
QuickCompress::compressDXT1G(rgba, &block.color);
|
||||
OptimalCompress::compressDXT1G(rgba, &block.color);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
@ -394,10 +399,10 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
|
||||
}
|
||||
|
||||
|
||||
void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
AlphaBlockDXT5 block;
|
||||
@ -405,11 +410,11 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
rgba.init(image, x, y);
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
if (compressionOptions.quality == Quality_Highest)
|
||||
{
|
||||
compressBlock_BruteForce(rgba, &block);
|
||||
OptimalCompress::compressDXT5A(rgba, &block);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -424,10 +429,10 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o
|
||||
}
|
||||
|
||||
|
||||
void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock xcolor;
|
||||
ColorBlock ycolor;
|
||||
@ -437,16 +442,16 @@ void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & o
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
xcolor.init(image, x, y);
|
||||
xcolor.init(m_image, x, y);
|
||||
xcolor.splatX();
|
||||
|
||||
ycolor.init(image, x, y);
|
||||
ycolor.init(m_image, x, y);
|
||||
ycolor.splatY();
|
||||
|
||||
if (compressionOptions.quality == Quality_Highest)
|
||||
{
|
||||
compressBlock_BruteForce(xcolor, &block.x);
|
||||
compressBlock_BruteForce(ycolor, &block.y);
|
||||
OptimalCompress::compressDXT5A(xcolor, &block.x);
|
||||
OptimalCompress::compressDXT5A(ycolor, &block.y);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -32,26 +32,46 @@ namespace nv
|
||||
class Image;
|
||||
class FloatImage;
|
||||
|
||||
void doPrecomputation();
|
||||
|
||||
// Fast compressors.
|
||||
void fastCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void fastCompressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
class FastCompressor
|
||||
{
|
||||
public:
|
||||
FastCompressor();
|
||||
~FastCompressor();
|
||||
|
||||
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
|
||||
|
||||
void compressDXT1(const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5(const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);
|
||||
|
||||
private:
|
||||
const Image * m_image;
|
||||
nvtt::AlphaMode m_alphaMode;
|
||||
};
|
||||
|
||||
class SlowCompressor
|
||||
{
|
||||
public:
|
||||
SlowCompressor();
|
||||
~SlowCompressor();
|
||||
|
||||
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
|
||||
|
||||
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
|
||||
private:
|
||||
const Image * m_image;
|
||||
nvtt::AlphaMode m_alphaMode;
|
||||
};
|
||||
|
||||
// Normal compressors.
|
||||
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
void compressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
void compressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
void compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
void compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
|
||||
// External compressors.
|
||||
#if defined(HAVE_S3QUANT)
|
||||
void s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
|
@ -123,7 +123,7 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
|
||||
}
|
||||
|
||||
// Zero padding.
|
||||
for (uint x = w; x < pitch; x++)
|
||||
for (uint x = w * byteCount; x < pitch; x++)
|
||||
{
|
||||
*(dst + x) = 0;
|
||||
}
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <nvimage/Filter.h>
|
||||
#include <nvimage/Quantize.h>
|
||||
#include <nvimage/NormalMap.h>
|
||||
#include <nvimage/PixelFormat.h>
|
||||
|
||||
#include "Compressor.h"
|
||||
#include "InputOptions.h"
|
||||
@ -41,7 +42,6 @@
|
||||
#include "OutputOptions.h"
|
||||
|
||||
#include "CompressDXT.h"
|
||||
#include "FastCompressDXT.h"
|
||||
#include "CompressRGB.h"
|
||||
#include "cuda/CudaUtils.h"
|
||||
#include "cuda/CudaCompressDXT.h"
|
||||
@ -53,10 +53,10 @@ using namespace nvtt;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
|
||||
static int blockSize(Format format)
|
||||
{
|
||||
if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) {
|
||||
if (format == Format_DXT1 || format == Format_DXT1a) {
|
||||
return 8;
|
||||
}
|
||||
else if (format == Format_DXT3) {
|
||||
@ -71,9 +71,6 @@ namespace
|
||||
else if (format == Format_BC5) {
|
||||
return 16;
|
||||
}
|
||||
else if (format == Format_CTX1) {
|
||||
return 8;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -124,7 +121,7 @@ namespace nvtt
|
||||
m_fixedImage = NULL;
|
||||
m_floatImage = image;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Convert linear float image to fixed image ready for compression.
|
||||
void toFixedImage(const InputOptions::Private & inputOptions)
|
||||
@ -156,7 +153,7 @@ namespace nvtt
|
||||
if (inputOptions.isNormalMap)
|
||||
{
|
||||
// Expand normals to [-1, 1] range.
|
||||
// floatImage->expandNormals(0);
|
||||
// floatImage->expandNormals(0);
|
||||
}
|
||||
else if (inputOptions.inputGamma != 1.0f)
|
||||
{
|
||||
@ -196,36 +193,29 @@ namespace nvtt
|
||||
return m_fixedImage.ptr();
|
||||
}
|
||||
|
||||
|
||||
|
||||
private:
|
||||
const Image * m_inputImage;
|
||||
AutoPtr<Image> m_fixedImage;
|
||||
AutoPtr<FloatImage> m_floatImage;
|
||||
};
|
||||
|
||||
}
|
||||
} // nvtt namespace
|
||||
|
||||
|
||||
Compressor::Compressor() : m(*new Compressor::Private())
|
||||
{
|
||||
// CUDA initialization.
|
||||
m.cudaSupported = cuda::isHardwarePresent();
|
||||
m.cudaEnabled = m.cudaSupported;
|
||||
m.cudaEnabled = false;
|
||||
m.cudaDevice = -1;
|
||||
|
||||
if (m.cudaEnabled)
|
||||
{
|
||||
m.cuda = new CudaCompressor();
|
||||
|
||||
if (!m.cuda->isValid())
|
||||
{
|
||||
m.cudaEnabled = false;
|
||||
m.cuda = NULL;
|
||||
}
|
||||
}
|
||||
enableCudaAcceleration(m.cudaSupported);
|
||||
}
|
||||
|
||||
Compressor::~Compressor()
|
||||
{
|
||||
enableCudaAcceleration(false);
|
||||
delete &m;
|
||||
}
|
||||
|
||||
@ -235,17 +225,33 @@ void Compressor::enableCudaAcceleration(bool enable)
|
||||
{
|
||||
if (m.cudaSupported)
|
||||
{
|
||||
m.cudaEnabled = enable;
|
||||
}
|
||||
|
||||
if (m.cudaEnabled && m.cuda == NULL)
|
||||
{
|
||||
m.cuda = new CudaCompressor();
|
||||
|
||||
if (!m.cuda->isValid())
|
||||
if (m.cudaEnabled && !enable)
|
||||
{
|
||||
m.cudaEnabled = false;
|
||||
m.cuda = NULL;
|
||||
|
||||
if (m.cudaDevice != -1)
|
||||
{
|
||||
// Exit device.
|
||||
cuda::exitDevice();
|
||||
}
|
||||
}
|
||||
else if (!m.cudaEnabled && enable)
|
||||
{
|
||||
// Init the CUDA device. This may return -1 if CUDA was already initialized by the app.
|
||||
m.cudaEnabled = cuda::initDevice(&m.cudaDevice);
|
||||
|
||||
if (m.cudaEnabled)
|
||||
{
|
||||
// Create compressor if initialization succeeds.
|
||||
m.cuda = new CudaCompressor();
|
||||
|
||||
// But cleanup if failed.
|
||||
if (!m.cuda->isValid())
|
||||
{
|
||||
enableCudaAcceleration(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -286,9 +292,9 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
|
||||
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
inputOptions.computeTargetExtents();
|
||||
|
||||
|
||||
// Output DDS header.
|
||||
if (!outputHeader(inputOptions, compressionOptions, outputOptions))
|
||||
{
|
||||
@ -304,7 +310,7 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
|
||||
}
|
||||
|
||||
outputOptions.closeFile();
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -319,15 +325,15 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
|
||||
}
|
||||
|
||||
DDSHeader header;
|
||||
|
||||
|
||||
header.setWidth(inputOptions.targetWidth);
|
||||
header.setHeight(inputOptions.targetHeight);
|
||||
|
||||
|
||||
int mipmapCount = inputOptions.realMipmapCount();
|
||||
nvDebugCheck(mipmapCount > 0);
|
||||
|
||||
|
||||
header.setMipmapCount(mipmapCount);
|
||||
|
||||
|
||||
if (inputOptions.textureType == TextureType_2D) {
|
||||
header.setTexture2D();
|
||||
}
|
||||
@ -335,10 +341,10 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
|
||||
header.setTextureCube();
|
||||
}
|
||||
/*else if (inputOptions.textureType == TextureType_3D) {
|
||||
header.setTexture3D();
|
||||
header.setDepth(inputOptions.targetDepth);
|
||||
header.setTexture3D();
|
||||
header.setDepth(inputOptions.targetDepth);
|
||||
}*/
|
||||
|
||||
|
||||
if (compressionOptions.format == Format_RGBA)
|
||||
{
|
||||
header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount));
|
||||
@ -347,8 +353,8 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
|
||||
else
|
||||
{
|
||||
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
|
||||
|
||||
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) {
|
||||
|
||||
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
|
||||
header.setFourCC('D', 'X', 'T', '1');
|
||||
if (inputOptions.isNormalMap) header.setNormalFlag(true);
|
||||
}
|
||||
@ -369,15 +375,11 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
|
||||
header.setFourCC('A', 'T', 'I', '2');
|
||||
if (inputOptions.isNormalMap) header.setNormalFlag(true);
|
||||
}
|
||||
else if (compressionOptions.format == Format_CTX1) {
|
||||
header.setFourCC('C', 'T', 'X', '1');
|
||||
if (inputOptions.isNormalMap) header.setNormalFlag(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Swap bytes if necessary.
|
||||
header.swapBytes();
|
||||
|
||||
|
||||
uint headerSize = 128;
|
||||
if (header.hasDX10Header())
|
||||
{
|
||||
@ -390,7 +392,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
|
||||
{
|
||||
outputOptions.errorHandler->error(Error_FileWrite);
|
||||
}
|
||||
|
||||
|
||||
return writeSucceed;
|
||||
}
|
||||
|
||||
@ -426,7 +428,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
quantizeMipmap(mipmap, compressionOptions);
|
||||
|
||||
compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
|
||||
@ -436,7 +438,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
|
||||
h = max(1U, h / 2);
|
||||
d = max(1U, d / 2);
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -478,11 +480,6 @@ bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Privat
|
||||
// Convert linear float image to fixed image ready for compression.
|
||||
mipmap.toFixedImage(inputOptions);
|
||||
|
||||
if (inputOptions.premultiplyAlpha)
|
||||
{
|
||||
premultiplyAlphaMipmap(mipmap, inputOptions);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -492,7 +489,7 @@ int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOpti
|
||||
{
|
||||
int idx = f * inputOptions.mipmapCount + m;
|
||||
const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
|
||||
|
||||
|
||||
if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d))
|
||||
{
|
||||
if (inputImage.data != NULL)
|
||||
@ -547,7 +544,7 @@ void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::
|
||||
mipmap.toFloatImage(inputOptions);
|
||||
|
||||
const FloatImage * floatImage = mipmap.asFloatImage();
|
||||
|
||||
|
||||
if (inputOptions.mipmapFilter == MipmapFilter_Box)
|
||||
{
|
||||
// Use fast downsample.
|
||||
@ -565,7 +562,7 @@ void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::
|
||||
filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
|
||||
mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
|
||||
}
|
||||
|
||||
|
||||
// Normalize mipmap.
|
||||
if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps)
|
||||
{
|
||||
@ -583,40 +580,17 @@ void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Priva
|
||||
|
||||
// Resize image.
|
||||
BoxFilter boxFilter;
|
||||
mipmap.setImage(mipmap.asFloatImage()->downSample(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
|
||||
mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
|
||||
}
|
||||
|
||||
|
||||
void Compressor::Private::premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
|
||||
{
|
||||
nvDebugCheck(mipmap.asFixedImage() != NULL);
|
||||
|
||||
Image * image = mipmap.asMutableFixedImage();
|
||||
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
|
||||
const uint count = w * h;
|
||||
|
||||
for (uint i = 0; i < count; ++i)
|
||||
{
|
||||
Color32 c = image->pixel(i);
|
||||
|
||||
c.r = (uint(c.r) * uint(c.a)) >> 8;
|
||||
c.g = (uint(c.g) * uint(c.a)) >> 8;
|
||||
c.b = (uint(c.b) * uint(c.a)) >> 8;
|
||||
|
||||
image->pixel(i) = c;
|
||||
}
|
||||
}
|
||||
|
||||
// Process an input image: Convert to normal map, normalize, or convert to linear space.
|
||||
void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
|
||||
{
|
||||
if (inputOptions.convertToNormalMap)
|
||||
{
|
||||
mipmap.toFixedImage(inputOptions);
|
||||
|
||||
|
||||
Vector4 heightScale = inputOptions.heightFactors;
|
||||
mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale));
|
||||
}
|
||||
@ -653,13 +627,6 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
|
||||
{
|
||||
nvDebugCheck(mipmap.asFixedImage() != NULL);
|
||||
|
||||
if (compressionOptions.enableColorDithering)
|
||||
{
|
||||
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
|
||||
{
|
||||
Quantize::FloydSteinberg_RGB16(mipmap.asMutableFixedImage());
|
||||
}
|
||||
}
|
||||
if (compressionOptions.binaryAlpha)
|
||||
{
|
||||
if (compressionOptions.enableAlphaDithering)
|
||||
@ -671,19 +638,50 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
|
||||
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering)
|
||||
{
|
||||
uint rsize = 8;
|
||||
uint gsize = 8;
|
||||
uint bsize = 8;
|
||||
uint asize = 8;
|
||||
|
||||
if (compressionOptions.enableColorDithering)
|
||||
{
|
||||
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
|
||||
{
|
||||
rsize = 5;
|
||||
gsize = 6;
|
||||
bsize = 5;
|
||||
}
|
||||
else if (compressionOptions.format == Format_RGB)
|
||||
{
|
||||
uint rshift, gshift, bshift;
|
||||
PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize);
|
||||
PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize);
|
||||
PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize);
|
||||
}
|
||||
}
|
||||
|
||||
if (compressionOptions.enableAlphaDithering)
|
||||
{
|
||||
if (compressionOptions.format == Format_DXT3)
|
||||
{
|
||||
Quantize::Alpha4(mipmap.asMutableFixedImage());
|
||||
asize = 4;
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT1a)
|
||||
else if (compressionOptions.format == Format_RGB)
|
||||
{
|
||||
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
|
||||
uint ashift;
|
||||
PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize);
|
||||
}
|
||||
}
|
||||
|
||||
if (compressionOptions.binaryAlpha)
|
||||
{
|
||||
asize = 8; // Already quantized.
|
||||
}
|
||||
|
||||
Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize);
|
||||
}
|
||||
}
|
||||
|
||||
@ -692,9 +690,16 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
|
||||
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
|
||||
{
|
||||
const Image * image = mipmap.asFixedImage();
|
||||
|
||||
nvDebugCheck(image != NULL);
|
||||
|
||||
FastCompressor fast;
|
||||
fast.setImage(image, inputOptions.alphaMode);
|
||||
|
||||
SlowCompressor slow;
|
||||
slow.setImage(image, inputOptions.alphaMode);
|
||||
|
||||
const bool useCuda = cudaEnabled && image->width() * image->height() >= 512;
|
||||
|
||||
if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
|
||||
{
|
||||
compressRGB(image, outputOptions, compressionOptions);
|
||||
@ -710,76 +715,66 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_ATITC)
|
||||
if (compressionOptions.externalCompressor == "ati")
|
||||
{
|
||||
atiCompressDXT1(image, outputOptions);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
fastCompressDXT1(image, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cudaEnabled)
|
||||
if (compressionOptions.externalCompressor == "ati")
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->compressDXT1(image, compressionOptions, outputOptions);
|
||||
atiCompressDXT1(image, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
compressDXT1(image, outputOptions, compressionOptions);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
fast.compressDXT1(outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (useCuda)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->setImage(image, inputOptions.alphaMode);
|
||||
cuda->compressDXT1(compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
slow.compressDXT1(compressionOptions, outputOptions);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT1a)
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
fastCompressDXT1a(image, outputOptions);
|
||||
fast.compressDXT1a(outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cudaEnabled)
|
||||
if (useCuda)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
/*cuda*/compressDXT1a(image, outputOptions, compressionOptions);
|
||||
/*cuda*/slow.compressDXT1a(compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
compressDXT1a(image, outputOptions, compressionOptions);
|
||||
slow.compressDXT1a(compressionOptions, outputOptions);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT1n)
|
||||
{
|
||||
if (cudaEnabled)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->compressDXT1n(image, compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
|
||||
}
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT3)
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
fastCompressDXT3(image, outputOptions);
|
||||
fast.compressDXT3(outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cudaEnabled)
|
||||
if (useCuda)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->compressDXT3(image, inputOptions, compressionOptions, outputOptions);
|
||||
cuda->setImage(image, inputOptions.alphaMode);
|
||||
cuda->compressDXT3(compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
compressDXT3(image, outputOptions, compressionOptions);
|
||||
slow.compressDXT3(compressionOptions, outputOptions);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -787,18 +782,19 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
fastCompressDXT5(image, outputOptions);
|
||||
fast.compressDXT5(outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cudaEnabled)
|
||||
if (useCuda)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->compressDXT5(image, inputOptions, compressionOptions, outputOptions);
|
||||
cuda->setImage(image, inputOptions.alphaMode);
|
||||
cuda->compressDXT5(compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
compressDXT5(image, outputOptions, compressionOptions);
|
||||
slow.compressDXT5(compressionOptions, outputOptions);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -806,32 +802,20 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
fastCompressDXT5n(image, outputOptions);
|
||||
fast.compressDXT5n(outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
compressDXT5n(image, outputOptions, compressionOptions);
|
||||
slow.compressDXT5n(compressionOptions, outputOptions);
|
||||
}
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC4)
|
||||
{
|
||||
compressBC4(image, outputOptions, compressionOptions);
|
||||
slow.compressBC4(compressionOptions, outputOptions);
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC5)
|
||||
{
|
||||
compressBC5(image, outputOptions, compressionOptions);
|
||||
}
|
||||
else if (compressionOptions.format == Format_CTX1)
|
||||
{
|
||||
if (cudaEnabled)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->compressCTX1(image, compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
|
||||
}
|
||||
slow.compressBC5(compressionOptions, outputOptions);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -844,27 +828,27 @@ int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions
|
||||
const uint bitCount = compressionOptions.bitcount;
|
||||
|
||||
inputOptions.computeTargetExtents();
|
||||
|
||||
|
||||
uint mipmapCount = inputOptions.realMipmapCount();
|
||||
|
||||
|
||||
int size = 0;
|
||||
|
||||
|
||||
for (uint f = 0; f < inputOptions.faceCount; f++)
|
||||
{
|
||||
uint w = inputOptions.targetWidth;
|
||||
uint h = inputOptions.targetHeight;
|
||||
uint d = inputOptions.targetDepth;
|
||||
|
||||
|
||||
for (uint m = 0; m < mipmapCount; m++)
|
||||
{
|
||||
size += computeImageSize(w, h, d, bitCount, format);
|
||||
|
||||
|
||||
// Compute extents of next mipmap:
|
||||
w = max(1U, w / 2);
|
||||
h = max(1U, h / 2);
|
||||
d = max(1U, d / 2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return size;
|
||||
}
|
||||
|
@ -58,16 +58,17 @@ namespace nvtt
|
||||
|
||||
void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
|
||||
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
|
||||
void premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
|
||||
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
|
||||
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
|
||||
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
|
||||
|
||||
|
||||
|
||||
public:
|
||||
|
||||
bool cudaSupported;
|
||||
bool cudaEnabled;
|
||||
int cudaDevice;
|
||||
|
||||
nv::AutoPtr<nv::CudaCompressor> cuda;
|
||||
|
||||
|
@ -1,456 +0,0 @@
|
||||
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <nvmath/Color.h>
|
||||
#include <nvimage/ColorBlock.h>
|
||||
#include <nvimage/BlockDXT.h>
|
||||
|
||||
#include "FastCompressDXT.h"
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(__SSE__)
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(__MMX__)
|
||||
#include <mmintrin.h>
|
||||
#endif
|
||||
|
||||
#undef __VEC__
|
||||
#if defined(__VEC__)
|
||||
#include <altivec.h>
|
||||
#undef bool
|
||||
#endif
|
||||
// Online Resources:
|
||||
// - http://www.jasondorie.com/ImageLib.zip
|
||||
// - http://homepage.hispeed.ch/rscheidegger/dri_experimental/s3tc_index.html
|
||||
// - http://www.sjbrown.co.uk/?article=dxt
|
||||
|
||||
using namespace nv;
|
||||
|
||||
|
||||
#if defined(__SSE2__) && 0
|
||||
|
||||
// @@ TODO
|
||||
|
||||
typedef __m128i VectorColor;
|
||||
|
||||
inline static __m128i loadColor(Color32 c)
|
||||
{
|
||||
return ...;
|
||||
}
|
||||
|
||||
inline static __m128i absoluteDifference(__m128i a, __m128i b)
|
||||
{
|
||||
return ...;
|
||||
}
|
||||
|
||||
inline uint colorDistance(__m128i a, __m128i b)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(__MMX__) && 0
|
||||
|
||||
typedef __m64 VectorColor;
|
||||
|
||||
inline static __m64 loadColor(Color32 c)
|
||||
{
|
||||
return _mm_unpacklo_pi8(_mm_cvtsi32_si64(c), _mm_setzero_si64());
|
||||
}
|
||||
|
||||
inline static __m64 absoluteDifference(__m64 a, __m64 b)
|
||||
{
|
||||
// = |a-b| or |b-a|
|
||||
return _mm_or_si64(_mm_subs_pu16(a, b), _mm_subs_pu16(b, a));
|
||||
}
|
||||
|
||||
inline uint colorDistance(__m64 a, __m64 b)
|
||||
{
|
||||
union {
|
||||
__m64 v;
|
||||
uint16 part[4];
|
||||
} s;
|
||||
|
||||
s.v = absoluteDifference(a, b);
|
||||
|
||||
// @@ This is very slow!
|
||||
return s.part[0] + s.part[1] + s.part[2] + s.part[3];
|
||||
}
|
||||
|
||||
#define vectorEnd _mm_empty
|
||||
|
||||
#elif defined(__VEC__)
|
||||
|
||||
typedef vector signed int VectorColor;
|
||||
|
||||
inline static vector signed int loadColor(Color32 c)
|
||||
{
|
||||
return (vector signed int) (c.r, c.g, c.b, c.a);
|
||||
}
|
||||
|
||||
// Get the absolute distance between the given colors.
|
||||
inline static uint colorDistance(vector signed int c0, vector signed int c1)
|
||||
{
|
||||
int result;
|
||||
vector signed int v = vec_sums(vec_abs(vec_sub(c0, c1)), (vector signed int)0);
|
||||
vec_ste(vec_splat(v, 3), 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline void vectorEnd()
|
||||
{
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
typedef Color32 VectorColor;
|
||||
|
||||
inline static Color32 loadColor(Color32 c)
|
||||
{
|
||||
return c;
|
||||
}
|
||||
|
||||
inline static Color32 premultiplyAlpha(Color32 c)
|
||||
{
|
||||
Color32 pm;
|
||||
pm.r = (c.r * c.a) >> 8;
|
||||
pm.g = (c.g * c.a) >> 8;
|
||||
pm.b = (c.b * c.a) >> 8;
|
||||
pm.a = c.a;
|
||||
return pm;
|
||||
}
|
||||
|
||||
inline static uint sqr(uint s)
|
||||
{
|
||||
return s*s;
|
||||
}
|
||||
|
||||
// Get the absolute distance between the given colors.
|
||||
inline static uint colorDistance(Color32 c0, Color32 c1)
|
||||
{
|
||||
return sqr(c0.r - c1.r) + sqr(c0.g - c1.g) + sqr(c0.b - c1.b);
|
||||
//return abs(c0.r - c1.r) + abs(c0.g - c1.g) + abs(c0.b - c1.b);
|
||||
}
|
||||
|
||||
inline void vectorEnd()
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette[4])
|
||||
{
|
||||
const VectorColor vcolor0 = loadColor(palette[0]);
|
||||
const VectorColor vcolor1 = loadColor(palette[1]);
|
||||
const VectorColor vcolor2 = loadColor(palette[2]);
|
||||
const VectorColor vcolor3 = loadColor(palette[3]);
|
||||
|
||||
uint indices = 0;
|
||||
for(int i = 0; i < 16; i++) {
|
||||
const VectorColor vcolor = loadColor(rgba.color(i));
|
||||
|
||||
uint d0 = colorDistance(vcolor0, vcolor);
|
||||
uint d1 = colorDistance(vcolor1, vcolor);
|
||||
uint d2 = colorDistance(vcolor2, vcolor);
|
||||
uint d3 = colorDistance(vcolor3, vcolor);
|
||||
|
||||
uint b0 = d0 > d3;
|
||||
uint b1 = d1 > d2;
|
||||
uint b2 = d0 > d2;
|
||||
uint b3 = d1 > d3;
|
||||
uint b4 = d2 > d3;
|
||||
|
||||
uint x0 = b1 & b2;
|
||||
uint x1 = b0 & b3;
|
||||
uint x2 = b0 & b4;
|
||||
|
||||
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
|
||||
}
|
||||
|
||||
vectorEnd();
|
||||
return indices;
|
||||
}
|
||||
|
||||
|
||||
// Compressor that uses bounding box.
|
||||
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block)
|
||||
{
|
||||
Color32 c0, c1;
|
||||
rgba.boundsRange(&c1, &c0);
|
||||
|
||||
block->col0 = toColor16(c0);
|
||||
block->col1 = toColor16(c1);
|
||||
|
||||
nvDebugCheck(block->col0.u > block->col1.u);
|
||||
|
||||
// Use 4 color mode only.
|
||||
//if (block->col0.u < block->col1.u) {
|
||||
// swap(block->col0.u, block->col1.u);
|
||||
//}
|
||||
|
||||
Color32 palette[4];
|
||||
block->evaluatePalette4(palette);
|
||||
|
||||
block->indices = computeIndices(rgba, palette);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Encode DXT3 block.
|
||||
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block)
|
||||
{
|
||||
compressBlock_BoundsRange(rgba, &block->color);
|
||||
compressBlock(rgba, &block->alpha);
|
||||
}
|
||||
|
||||
// Encode DXT3 alpha block.
|
||||
void nv::compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block)
|
||||
{
|
||||
block->alpha0 = rgba.color(0).a >> 4;
|
||||
block->alpha1 = rgba.color(1).a >> 4;
|
||||
block->alpha2 = rgba.color(2).a >> 4;
|
||||
block->alpha3 = rgba.color(3).a >> 4;
|
||||
block->alpha4 = rgba.color(4).a >> 4;
|
||||
block->alpha5 = rgba.color(5).a >> 4;
|
||||
block->alpha6 = rgba.color(6).a >> 4;
|
||||
block->alpha7 = rgba.color(7).a >> 4;
|
||||
block->alpha8 = rgba.color(8).a >> 4;
|
||||
block->alpha9 = rgba.color(9).a >> 4;
|
||||
block->alphaA = rgba.color(10).a >> 4;
|
||||
block->alphaB = rgba.color(11).a >> 4;
|
||||
block->alphaC = rgba.color(12).a >> 4;
|
||||
block->alphaD = rgba.color(13).a >> 4;
|
||||
block->alphaE = rgba.color(14).a >> 4;
|
||||
block->alphaF = rgba.color(15).a >> 4;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
|
||||
{
|
||||
uint8 alphas[8];
|
||||
block->evaluatePalette(alphas);
|
||||
|
||||
uint totalError = 0;
|
||||
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 alpha = rgba.color(i).a;
|
||||
|
||||
uint besterror = 256*256;
|
||||
uint best = 8;
|
||||
for(uint p = 0; p < 8; p++)
|
||||
{
|
||||
int d = alphas[p] - alpha;
|
||||
uint error = d * d;
|
||||
|
||||
if (error < besterror)
|
||||
{
|
||||
besterror = error;
|
||||
best = p;
|
||||
}
|
||||
}
|
||||
nvDebugCheck(best < 8);
|
||||
|
||||
totalError += besterror;
|
||||
block->setIndex(i, best);
|
||||
}
|
||||
|
||||
return totalError;
|
||||
}
|
||||
|
||||
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
|
||||
{
|
||||
uint8 alphas[8];
|
||||
block->evaluatePalette(alphas);
|
||||
|
||||
uint totalError = 0;
|
||||
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 alpha = rgba.color(i).a;
|
||||
|
||||
uint besterror = 256*256;
|
||||
uint best;
|
||||
for(uint p = 0; p < 8; p++)
|
||||
{
|
||||
int d = alphas[p] - alpha;
|
||||
uint error = d * d;
|
||||
|
||||
if (error < besterror)
|
||||
{
|
||||
besterror = error;
|
||||
best = p;
|
||||
}
|
||||
}
|
||||
|
||||
totalError += besterror;
|
||||
}
|
||||
|
||||
return totalError;
|
||||
}
|
||||
|
||||
|
||||
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block)
|
||||
{
|
||||
Color32 c0, c1;
|
||||
rgba.boundsRangeAlpha(&c1, &c0);
|
||||
|
||||
block->color.col0 = toColor16(c0);
|
||||
block->color.col1 = toColor16(c1);
|
||||
|
||||
nvDebugCheck(block->color.col0.u > block->color.col1.u);
|
||||
|
||||
Color32 palette[4];
|
||||
block->color.evaluatePalette4(palette);
|
||||
|
||||
block->color.indices = computeIndices(rgba, palette);
|
||||
|
||||
nvDebugCheck(c0.a <= c1.a);
|
||||
|
||||
block->alpha.alpha0 = c0.a;
|
||||
block->alpha.alpha1 = c1.a;
|
||||
|
||||
computeAlphaIndices(rgba, &block->alpha);
|
||||
}
|
||||
|
||||
|
||||
uint nv::compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block)
|
||||
{
|
||||
uint8 alpha0 = 0;
|
||||
uint8 alpha1 = 255;
|
||||
|
||||
// Get min/max alpha.
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 alpha = rgba.color(i).a;
|
||||
alpha0 = max(alpha0, alpha);
|
||||
alpha1 = min(alpha1, alpha);
|
||||
}
|
||||
|
||||
alpha0 = alpha0 - (alpha0 - alpha1) / 32;
|
||||
alpha1 = alpha1 + (alpha0 - alpha1) / 32;
|
||||
|
||||
AlphaBlockDXT5 block0;
|
||||
block0.alpha0 = alpha0;
|
||||
block0.alpha1 = alpha1;
|
||||
uint error0 = computeAlphaIndices(rgba, &block0);
|
||||
|
||||
AlphaBlockDXT5 block1;
|
||||
block1.alpha0 = alpha1;
|
||||
block1.alpha1 = alpha0;
|
||||
uint error1 = computeAlphaIndices(rgba, &block1);
|
||||
|
||||
if (error0 < error1)
|
||||
{
|
||||
*block = block0;
|
||||
return error0;
|
||||
}
|
||||
else
|
||||
{
|
||||
*block = block1;
|
||||
return error1;
|
||||
}
|
||||
}
|
||||
|
||||
uint nv::compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block)
|
||||
{
|
||||
uint8 mina = 255;
|
||||
uint8 maxa = 0;
|
||||
|
||||
// Get min/max alpha.
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 alpha = rgba.color(i).a;
|
||||
mina = min(mina, alpha);
|
||||
maxa = max(maxa, alpha);
|
||||
}
|
||||
|
||||
block->alpha0 = maxa;
|
||||
block->alpha1 = mina;
|
||||
|
||||
/*int centroidDist = 256;
|
||||
int centroid;
|
||||
|
||||
// Get the closest to the centroid.
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 alpha = rgba.color(i).a;
|
||||
int dist = abs(alpha - (maxa + mina) / 2);
|
||||
if (dist < centroidDist)
|
||||
{
|
||||
centroidDist = dist;
|
||||
centroid = alpha;
|
||||
}
|
||||
}*/
|
||||
|
||||
if (maxa - mina > 8)
|
||||
{
|
||||
int besterror = computeAlphaError(rgba, block);
|
||||
int besta0 = maxa;
|
||||
int besta1 = mina;
|
||||
|
||||
for (int a0 = mina+9; a0 < maxa; a0++)
|
||||
{
|
||||
for (int a1 = mina; a1 < a0-8; a1++)
|
||||
//for (int a1 = mina; a1 < maxa; a1++)
|
||||
{
|
||||
//nvCheck(abs(a1-a0) > 8);
|
||||
|
||||
//if (abs(a0 - a1) < 8) continue;
|
||||
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
|
||||
if ((maxa-a0) + (a1-mina) > besterror)
|
||||
continue;
|
||||
|
||||
block->alpha0 = a0;
|
||||
block->alpha1 = a1;
|
||||
int error = computeAlphaError(rgba, block);
|
||||
|
||||
if (error < besterror)
|
||||
{
|
||||
besterror = error;
|
||||
besta0 = a0;
|
||||
besta1 = a1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
block->alpha0 = besta0;
|
||||
block->alpha1 = besta1;
|
||||
}
|
||||
|
||||
return computeAlphaIndices(rgba, block);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,84 +0,0 @@
|
||||
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#ifndef NV_TT_FASTCOMPRESSDXT_H
|
||||
#define NV_TT_FASTCOMPRESSDXT_H
|
||||
|
||||
#include <nvimage/nvimage.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
struct ColorBlock;
|
||||
struct BlockDXT1;
|
||||
struct BlockDXT3;
|
||||
struct BlockDXT5;
|
||||
struct AlphaBlockDXT3;
|
||||
struct AlphaBlockDXT5;
|
||||
|
||||
// Color compression:
|
||||
|
||||
// Compressor that uses the extremes of the luminance axis.
|
||||
// void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
|
||||
// Compressor that uses the extremes of the luminance axis.
|
||||
// void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
|
||||
// Compressor that uses bounding box.
|
||||
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
|
||||
// Compressor that uses bounding box and takes alpha into account.
|
||||
// void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
|
||||
|
||||
// Simple, but slow compressor that tests all color pairs.
|
||||
// void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
|
||||
// Brute force 6d search along the best fit axis.
|
||||
// void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
|
||||
// Spatial greedy search.
|
||||
// void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
// void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
// void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
|
||||
// Brute force compressor for DXT5n
|
||||
// void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
|
||||
// Minimize error of the endpoints.
|
||||
// void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
|
||||
// uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
|
||||
// uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
|
||||
|
||||
// Alpha compression:
|
||||
void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block);
|
||||
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block);
|
||||
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block);
|
||||
|
||||
uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block);
|
||||
uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block);
|
||||
// uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_TT_FASTCOMPRESSDXT_H
|
@ -23,8 +23,6 @@
|
||||
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#include <nvcore/Containers.h> // nextPowerOfTwo
|
||||
|
||||
#include <nvcore/Memory.h>
|
||||
|
||||
#include "nvtt.h"
|
||||
@ -96,7 +94,7 @@ void InputOptions::reset()
|
||||
m.textureType = TextureType_2D;
|
||||
m.inputFormat = InputFormat_BGRA_8UB;
|
||||
|
||||
m.alphaMode = AlphaMode_Transparency;
|
||||
m.alphaMode = AlphaMode_None;
|
||||
|
||||
m.inputGamma = 2.2f;
|
||||
m.outputGamma = 2.2f;
|
||||
@ -120,8 +118,6 @@ void InputOptions::reset()
|
||||
|
||||
m.maxExtent = 0;
|
||||
m.roundMode = RoundMode_None;
|
||||
|
||||
m.premultiplyAlpha = false;
|
||||
}
|
||||
|
||||
|
||||
@ -309,19 +305,6 @@ void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2,
|
||||
//m.linearTransform.setRow(channel, w);
|
||||
}
|
||||
|
||||
void InputOptions::setSwizzleTransform(int x, int y, int z, int w)
|
||||
{
|
||||
nvCheck(x >= 0 && x < 3);
|
||||
nvCheck(y >= 0 && y < 3);
|
||||
nvCheck(z >= 0 && z < 3);
|
||||
nvCheck(w >= 0 && w < 3);
|
||||
|
||||
// m.xswizzle = x;
|
||||
// m.yswizzle = y;
|
||||
// m.zswizzle = z;
|
||||
// m.wswizzle = w;
|
||||
}
|
||||
|
||||
void InputOptions::setMaxExtents(int e)
|
||||
{
|
||||
nvDebugCheck(e > 0);
|
||||
@ -333,10 +316,6 @@ void InputOptions::setRoundMode(RoundMode mode)
|
||||
m.roundMode = mode;
|
||||
}
|
||||
|
||||
void InputOptions::setPremultiplyAlpha(bool b)
|
||||
{
|
||||
m.premultiplyAlpha = b;
|
||||
}
|
||||
|
||||
void InputOptions::Private::computeTargetExtents() const
|
||||
{
|
||||
|
@ -78,8 +78,6 @@ namespace nvtt
|
||||
uint maxExtent;
|
||||
RoundMode roundMode;
|
||||
|
||||
bool premultiplyAlpha;
|
||||
|
||||
// @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere...
|
||||
mutable uint targetWidth;
|
||||
mutable uint targetHeight;
|
||||
@ -91,7 +89,7 @@ namespace nvtt
|
||||
int realMipmapCount() const;
|
||||
|
||||
const nv::Image * image(uint face, uint mipmap) const;
|
||||
const nv::Image * image(uint idx) const;
|
||||
const nv::Image * image(uint idx) const;
|
||||
|
||||
};
|
||||
|
||||
|
368
src/nvtt/OptimalCompressDXT.cpp
Normal file
368
src/nvtt/OptimalCompressDXT.cpp
Normal file
@ -0,0 +1,368 @@
|
||||
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <nvcore/Containers.h> // swap
|
||||
|
||||
#include <nvmath/Color.h>
|
||||
|
||||
#include <nvimage/ColorBlock.h>
|
||||
#include <nvimage/BlockDXT.h>
|
||||
|
||||
#include "OptimalCompressDXT.h"
|
||||
#include "SingleColorLookup.h"
|
||||
|
||||
|
||||
using namespace nv;
|
||||
using namespace OptimalCompress;
|
||||
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
|
||||
{
|
||||
nvDebugCheck(block != NULL);
|
||||
|
||||
int palette[4];
|
||||
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
|
||||
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
|
||||
palette[2] = (2 * palette[0] + palette[1]) / 3;
|
||||
palette[3] = (2 * palette[1] + palette[0]) / 3;
|
||||
|
||||
int totalError = 0;
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
const int green = rgba.color(i).g;
|
||||
|
||||
int error = abs(green - palette[0]);
|
||||
error = min(error, abs(green - palette[1]));
|
||||
error = min(error, abs(green - palette[2]));
|
||||
error = min(error, abs(green - palette[3]));
|
||||
|
||||
totalError += error;
|
||||
}
|
||||
|
||||
return totalError;
|
||||
}
|
||||
|
||||
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
|
||||
{
|
||||
const int color0 = palette[0].g;
|
||||
const int color1 = palette[1].g;
|
||||
const int color2 = palette[2].g;
|
||||
const int color3 = palette[3].g;
|
||||
|
||||
uint indices = 0;
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
const int color = rgba.color(i).g;
|
||||
|
||||
uint d0 = abs(color0 - color);
|
||||
uint d1 = abs(color1 - color);
|
||||
uint d2 = abs(color2 - color);
|
||||
uint d3 = abs(color3 - color);
|
||||
|
||||
uint b0 = d0 > d3;
|
||||
uint b1 = d1 > d2;
|
||||
uint b2 = d0 > d2;
|
||||
uint b3 = d1 > d3;
|
||||
uint b4 = d2 > d3;
|
||||
|
||||
uint x0 = b1 & b2;
|
||||
uint x1 = b0 & b3;
|
||||
uint x2 = b0 & b4;
|
||||
|
||||
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
|
||||
}
|
||||
|
||||
return indices;
|
||||
}
|
||||
|
||||
// Choose quantized color that produces less error. Used by DXT3 compressor.
|
||||
inline static uint quantize4(uint8 a)
|
||||
{
|
||||
int q0 = (a >> 4) - 1;
|
||||
int q1 = (a >> 4);
|
||||
int q2 = (a >> 4) + 1;
|
||||
|
||||
q0 = (q0 << 4) | q0;
|
||||
q1 = (q1 << 4) | q1;
|
||||
q2 = (q2 << 4) | q2;
|
||||
|
||||
int d0 = abs(q0 - a);
|
||||
int d1 = abs(q1 - a);
|
||||
int d2 = abs(q2 - a);
|
||||
|
||||
if (d0 < d1 && d0 < d2) return q0 >> 4;
|
||||
if (d1 < d2) return q1 >> 4;
|
||||
return q2 >> 4;
|
||||
}
|
||||
|
||||
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
|
||||
{
|
||||
uint8 alphas[8];
|
||||
block->evaluatePalette(alphas);
|
||||
|
||||
uint totalError = 0;
|
||||
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 alpha = rgba.color(i).a;
|
||||
|
||||
uint besterror = 256*256;
|
||||
uint best;
|
||||
for (uint p = 0; p < 8; p++)
|
||||
{
|
||||
int d = alphas[p] - alpha;
|
||||
uint error = d * d;
|
||||
|
||||
if (error < besterror)
|
||||
{
|
||||
besterror = error;
|
||||
best = p;
|
||||
}
|
||||
}
|
||||
|
||||
totalError += besterror;
|
||||
}
|
||||
|
||||
return totalError;
|
||||
}
|
||||
|
||||
static void computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
|
||||
{
|
||||
uint8 alphas[8];
|
||||
block->evaluatePalette(alphas);
|
||||
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 alpha = rgba.color(i).a;
|
||||
|
||||
uint besterror = 256*256;
|
||||
uint best = 8;
|
||||
for(uint p = 0; p < 8; p++)
|
||||
{
|
||||
int d = alphas[p] - alpha;
|
||||
uint error = d * d;
|
||||
|
||||
if (error < besterror)
|
||||
{
|
||||
besterror = error;
|
||||
best = p;
|
||||
}
|
||||
}
|
||||
nvDebugCheck(best < 8);
|
||||
|
||||
block->setIndex(i, best);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Single color compressor, based on:
|
||||
// https://mollyrocket.com/forums/viewtopic.php?t=392
|
||||
void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
|
||||
{
|
||||
dxtBlock->col0.r = OMatch5[c.r][0];
|
||||
dxtBlock->col0.g = OMatch6[c.g][0];
|
||||
dxtBlock->col0.b = OMatch5[c.b][0];
|
||||
dxtBlock->col1.r = OMatch5[c.r][1];
|
||||
dxtBlock->col1.g = OMatch6[c.g][1];
|
||||
dxtBlock->col1.b = OMatch5[c.b][1];
|
||||
dxtBlock->indices = 0xaaaaaaaa;
|
||||
|
||||
if (dxtBlock->col0.u < dxtBlock->col1.u)
|
||||
{
|
||||
swap(dxtBlock->col0.u, dxtBlock->col1.u);
|
||||
dxtBlock->indices ^= 0x55555555;
|
||||
}
|
||||
}
|
||||
|
||||
void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
|
||||
{
|
||||
if (rgba.a < 128)
|
||||
{
|
||||
dxtBlock->col0.u = 0;
|
||||
dxtBlock->col1.u = 0;
|
||||
dxtBlock->indices = 0xFFFFFFFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
compressDXT1(rgba, dxtBlock);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Brute force green channel compressor
|
||||
void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
|
||||
{
|
||||
nvDebugCheck(block != NULL);
|
||||
|
||||
uint8 ming = 63;
|
||||
uint8 maxg = 0;
|
||||
|
||||
// Get min/max green.
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 green = rgba.color(i).g >> 2;
|
||||
ming = min(ming, green);
|
||||
maxg = max(maxg, green);
|
||||
}
|
||||
|
||||
block->col0.r = 31;
|
||||
block->col1.r = 31;
|
||||
block->col0.g = maxg;
|
||||
block->col1.g = ming;
|
||||
block->col0.b = 0;
|
||||
block->col1.b = 0;
|
||||
|
||||
if (maxg - ming > 4)
|
||||
{
|
||||
int besterror = computeGreenError(rgba, block);
|
||||
int bestg0 = maxg;
|
||||
int bestg1 = ming;
|
||||
|
||||
for (int g0 = ming+5; g0 < maxg; g0++)
|
||||
{
|
||||
for (int g1 = ming; g1 < g0-4; g1++)
|
||||
{
|
||||
if ((maxg-g0) + (g1-ming) > besterror)
|
||||
continue;
|
||||
|
||||
block->col0.g = g0;
|
||||
block->col1.g = g1;
|
||||
int error = computeGreenError(rgba, block);
|
||||
|
||||
if (error < besterror)
|
||||
{
|
||||
besterror = error;
|
||||
bestg0 = g0;
|
||||
bestg1 = g1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
block->col0.g = bestg0;
|
||||
block->col1.g = bestg1;
|
||||
}
|
||||
|
||||
Color32 palette[4];
|
||||
block->evaluatePalette(palette);
|
||||
block->indices = computeGreenIndices(rgba, palette);
|
||||
}
|
||||
|
||||
void OptimalCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
|
||||
{
|
||||
dxtBlock->alpha0 = quantize4(rgba.color(0).a);
|
||||
dxtBlock->alpha1 = quantize4(rgba.color(1).a);
|
||||
dxtBlock->alpha2 = quantize4(rgba.color(2).a);
|
||||
dxtBlock->alpha3 = quantize4(rgba.color(3).a);
|
||||
dxtBlock->alpha4 = quantize4(rgba.color(4).a);
|
||||
dxtBlock->alpha5 = quantize4(rgba.color(5).a);
|
||||
dxtBlock->alpha6 = quantize4(rgba.color(6).a);
|
||||
dxtBlock->alpha7 = quantize4(rgba.color(7).a);
|
||||
dxtBlock->alpha8 = quantize4(rgba.color(8).a);
|
||||
dxtBlock->alpha9 = quantize4(rgba.color(9).a);
|
||||
dxtBlock->alphaA = quantize4(rgba.color(10).a);
|
||||
dxtBlock->alphaB = quantize4(rgba.color(11).a);
|
||||
dxtBlock->alphaC = quantize4(rgba.color(12).a);
|
||||
dxtBlock->alphaD = quantize4(rgba.color(13).a);
|
||||
dxtBlock->alphaE = quantize4(rgba.color(14).a);
|
||||
dxtBlock->alphaF = quantize4(rgba.color(15).a);
|
||||
}
|
||||
|
||||
|
||||
void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
|
||||
{
|
||||
uint8 mina = 255;
|
||||
uint8 maxa = 0;
|
||||
|
||||
// Get min/max alpha.
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 alpha = rgba.color(i).a;
|
||||
mina = min(mina, alpha);
|
||||
maxa = max(maxa, alpha);
|
||||
}
|
||||
|
||||
dxtBlock->alpha0 = maxa;
|
||||
dxtBlock->alpha1 = mina;
|
||||
|
||||
/*int centroidDist = 256;
|
||||
int centroid;
|
||||
|
||||
// Get the closest to the centroid.
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 alpha = rgba.color(i).a;
|
||||
int dist = abs(alpha - (maxa + mina) / 2);
|
||||
if (dist < centroidDist)
|
||||
{
|
||||
centroidDist = dist;
|
||||
centroid = alpha;
|
||||
}
|
||||
}*/
|
||||
|
||||
if (maxa - mina > 8)
|
||||
{
|
||||
int besterror = computeAlphaError(rgba, dxtBlock);
|
||||
int besta0 = maxa;
|
||||
int besta1 = mina;
|
||||
|
||||
for (int a0 = mina+9; a0 < maxa; a0++)
|
||||
{
|
||||
for (int a1 = mina; a1 < a0-8; a1++)
|
||||
//for (int a1 = mina; a1 < maxa; a1++)
|
||||
{
|
||||
//nvCheck(abs(a1-a0) > 8);
|
||||
|
||||
//if (abs(a0 - a1) < 8) continue;
|
||||
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
|
||||
if ((maxa-a0) + (a1-mina) > besterror)
|
||||
continue;
|
||||
|
||||
dxtBlock->alpha0 = a0;
|
||||
dxtBlock->alpha1 = a1;
|
||||
int error = computeAlphaError(rgba, dxtBlock);
|
||||
|
||||
if (error < besterror)
|
||||
{
|
||||
besterror = error;
|
||||
besta0 = a0;
|
||||
besta1 = a1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dxtBlock->alpha0 = besta0;
|
||||
dxtBlock->alpha1 = besta1;
|
||||
}
|
||||
|
||||
computeAlphaIndices(rgba, dxtBlock);
|
||||
}
|
||||
|
49
src/nvtt/OptimalCompressDXT.h
Normal file
49
src/nvtt/OptimalCompressDXT.h
Normal file
@ -0,0 +1,49 @@
|
||||
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#ifndef NV_TT_OPTIMALCOMPRESSDXT_H
|
||||
#define NV_TT_OPTIMALCOMPRESSDXT_H
|
||||
|
||||
#include <nvimage/nvimage.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
struct ColorBlock;
|
||||
struct BlockDXT1;
|
||||
struct BlockDXT3;
|
||||
struct BlockDXT5;
|
||||
struct AlphaBlockDXT3;
|
||||
struct AlphaBlockDXT5;
|
||||
|
||||
namespace OptimalCompress
|
||||
{
|
||||
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
|
||||
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
|
||||
|
||||
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
|
||||
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
|
||||
}
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_TT_OPTIMALCOMPRESSDXT_H
|
@ -21,15 +21,13 @@
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <nvcore/Containers.h> // swap
|
||||
|
||||
#include <nvmath/Color.h>
|
||||
|
||||
#include <nvimage/ColorBlock.h>
|
||||
#include <nvimage/BlockDXT.h>
|
||||
|
||||
#include "QuickCompressDXT.h"
|
||||
#include "SingleColorLookup.h"
|
||||
#include "OptimalCompressDXT.h"
|
||||
|
||||
|
||||
using namespace nv;
|
||||
@ -290,70 +288,6 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
|
||||
dxtBlock->indices = computeIndices3(block, a, b);
|
||||
}*/
|
||||
|
||||
namespace
|
||||
{
|
||||
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
|
||||
{
|
||||
nvDebugCheck(block != NULL);
|
||||
|
||||
int palette[4];
|
||||
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
|
||||
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
|
||||
palette[2] = (2 * palette[0] + palette[1]) / 3;
|
||||
palette[3] = (2 * palette[1] + palette[0]) / 3;
|
||||
|
||||
int totalError = 0;
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
const int green = rgba.color(i).g;
|
||||
|
||||
int error = abs(green - palette[0]);
|
||||
error = min(error, abs(green - palette[1]));
|
||||
error = min(error, abs(green - palette[2]));
|
||||
error = min(error, abs(green - palette[3]));
|
||||
|
||||
totalError += error;
|
||||
}
|
||||
|
||||
return totalError;
|
||||
}
|
||||
|
||||
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
|
||||
{
|
||||
const int color0 = palette[0].g;
|
||||
const int color1 = palette[1].g;
|
||||
const int color2 = palette[2].g;
|
||||
const int color3 = palette[3].g;
|
||||
|
||||
uint indices = 0;
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
const int color = rgba.color(i).g;
|
||||
|
||||
uint d0 = abs(color0 - color);
|
||||
uint d1 = abs(color1 - color);
|
||||
uint d2 = abs(color2 - color);
|
||||
uint d3 = abs(color3 - color);
|
||||
|
||||
uint b0 = d0 > d3;
|
||||
uint b1 = d1 > d2;
|
||||
uint b2 = d0 > d2;
|
||||
uint b3 = d1 > d3;
|
||||
uint b4 = d2 > d3;
|
||||
|
||||
uint x0 = b1 & b2;
|
||||
uint x1 = b0 & b3;
|
||||
uint x2 = b0 & b4;
|
||||
|
||||
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
|
||||
}
|
||||
|
||||
return indices;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -505,79 +439,63 @@ namespace
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Single color compressor, based on:
|
||||
// https://mollyrocket.com/forums/viewtopic.php?t=392
|
||||
void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
|
||||
{
|
||||
dxtBlock->col0.r = OMatch5[c.r][0];
|
||||
dxtBlock->col0.g = OMatch6[c.g][0];
|
||||
dxtBlock->col0.b = OMatch5[c.b][0];
|
||||
dxtBlock->col1.r = OMatch5[c.r][1];
|
||||
dxtBlock->col1.g = OMatch6[c.g][1];
|
||||
dxtBlock->col1.b = OMatch5[c.b][1];
|
||||
dxtBlock->indices = 0xaaaaaaaa;
|
||||
|
||||
if (dxtBlock->col0.u < dxtBlock->col1.u)
|
||||
{
|
||||
swap(dxtBlock->col0.u, dxtBlock->col1.u);
|
||||
dxtBlock->indices ^= 0x55555555;
|
||||
}
|
||||
}
|
||||
|
||||
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
|
||||
{
|
||||
// read block
|
||||
Vector3 block[16];
|
||||
extractColorBlockRGB(rgba, block);
|
||||
|
||||
// find min and max colors
|
||||
Vector3 maxColor, minColor;
|
||||
findMinMaxColorsBox(block, 16, &maxColor, &minColor);
|
||||
|
||||
selectDiagonal(block, 16, &maxColor, &minColor);
|
||||
|
||||
insetBBox(&maxColor, &minColor);
|
||||
|
||||
uint16 color0 = roundAndExpand(&maxColor);
|
||||
uint16 color1 = roundAndExpand(&minColor);
|
||||
|
||||
if (color0 < color1)
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
swap(maxColor, minColor);
|
||||
swap(color0, color1);
|
||||
}
|
||||
|
||||
dxtBlock->col0 = Color16(color0);
|
||||
dxtBlock->col1 = Color16(color1);
|
||||
dxtBlock->indices = computeIndices4(block, maxColor, minColor);
|
||||
|
||||
optimizeEndPoints4(block, dxtBlock);
|
||||
}
|
||||
|
||||
|
||||
void QuickCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
|
||||
{
|
||||
if (rgba.a == 0)
|
||||
{
|
||||
dxtBlock->col0.u = 0;
|
||||
dxtBlock->col1.u = 0;
|
||||
dxtBlock->indices = 0xFFFFFFFF;
|
||||
OptimalCompress::compressDXT1(rgba.color(0), dxtBlock);
|
||||
}
|
||||
else
|
||||
{
|
||||
compressDXT1(rgba, dxtBlock);
|
||||
// read block
|
||||
Vector3 block[16];
|
||||
extractColorBlockRGB(rgba, block);
|
||||
|
||||
// find min and max colors
|
||||
Vector3 maxColor, minColor;
|
||||
findMinMaxColorsBox(block, 16, &maxColor, &minColor);
|
||||
|
||||
selectDiagonal(block, 16, &maxColor, &minColor);
|
||||
|
||||
insetBBox(&maxColor, &minColor);
|
||||
|
||||
uint16 color0 = roundAndExpand(&maxColor);
|
||||
uint16 color1 = roundAndExpand(&minColor);
|
||||
|
||||
if (color0 < color1)
|
||||
{
|
||||
swap(maxColor, minColor);
|
||||
swap(color0, color1);
|
||||
}
|
||||
|
||||
dxtBlock->col0 = Color16(color0);
|
||||
dxtBlock->col1 = Color16(color1);
|
||||
dxtBlock->indices = computeIndices4(block, maxColor, minColor);
|
||||
|
||||
optimizeEndPoints4(block, dxtBlock);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
|
||||
{
|
||||
if (!rgba.hasAlpha())
|
||||
bool hasAlpha = false;
|
||||
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
if (rgba.color(i).a < 128) {
|
||||
hasAlpha = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasAlpha)
|
||||
{
|
||||
compressDXT1(rgba, dxtBlock);
|
||||
}
|
||||
else
|
||||
// @@ Handle single RGB, with varying alpha? We need tables for single color compressor in 3 color mode.
|
||||
//else if (rgba.isSingleColorNoAlpha()) { ... }
|
||||
else
|
||||
{
|
||||
// read block
|
||||
Vector3 block[16];
|
||||
@ -609,95 +527,14 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
|
||||
}
|
||||
|
||||
|
||||
// Brute force green channel compressor
|
||||
void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
|
||||
{
|
||||
nvDebugCheck(block != NULL);
|
||||
|
||||
uint8 ming = 63;
|
||||
uint8 maxg = 0;
|
||||
|
||||
// Get min/max green.
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 green = rgba.color(i).g >> 2;
|
||||
ming = min(ming, green);
|
||||
maxg = max(maxg, green);
|
||||
}
|
||||
|
||||
block->col0.r = 31;
|
||||
block->col1.r = 31;
|
||||
block->col0.g = maxg;
|
||||
block->col1.g = ming;
|
||||
block->col0.b = 0;
|
||||
block->col1.b = 0;
|
||||
|
||||
if (maxg - ming > 4)
|
||||
{
|
||||
int besterror = computeGreenError(rgba, block);
|
||||
int bestg0 = maxg;
|
||||
int bestg1 = ming;
|
||||
|
||||
for (int g0 = ming+5; g0 < maxg; g0++)
|
||||
{
|
||||
for (int g1 = ming; g1 < g0-4; g1++)
|
||||
{
|
||||
if ((maxg-g0) + (g1-ming) > besterror)
|
||||
continue;
|
||||
|
||||
block->col0.g = g0;
|
||||
block->col1.g = g1;
|
||||
int error = computeGreenError(rgba, block);
|
||||
|
||||
if (error < besterror)
|
||||
{
|
||||
besterror = error;
|
||||
bestg0 = g0;
|
||||
bestg1 = g1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
block->col0.g = bestg0;
|
||||
block->col1.g = bestg1;
|
||||
}
|
||||
|
||||
Color32 palette[4];
|
||||
block->evaluatePalette(palette);
|
||||
block->indices = computeGreenIndices(rgba, palette);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
|
||||
{
|
||||
// @@ Round instead of truncate. When rounding take into account bit expansion.
|
||||
dxtBlock->alpha0 = rgba.color(0).a >> 4;
|
||||
dxtBlock->alpha1 = rgba.color(1).a >> 4;
|
||||
dxtBlock->alpha2 = rgba.color(2).a >> 4;
|
||||
dxtBlock->alpha3 = rgba.color(3).a >> 4;
|
||||
dxtBlock->alpha4 = rgba.color(4).a >> 4;
|
||||
dxtBlock->alpha5 = rgba.color(5).a >> 4;
|
||||
dxtBlock->alpha6 = rgba.color(6).a >> 4;
|
||||
dxtBlock->alpha7 = rgba.color(7).a >> 4;
|
||||
dxtBlock->alpha8 = rgba.color(8).a >> 4;
|
||||
dxtBlock->alpha9 = rgba.color(9).a >> 4;
|
||||
dxtBlock->alphaA = rgba.color(10).a >> 4;
|
||||
dxtBlock->alphaB = rgba.color(11).a >> 4;
|
||||
dxtBlock->alphaC = rgba.color(12).a >> 4;
|
||||
dxtBlock->alphaD = rgba.color(13).a >> 4;
|
||||
dxtBlock->alphaE = rgba.color(14).a >> 4;
|
||||
dxtBlock->alphaF = rgba.color(15).a >> 4;
|
||||
}
|
||||
|
||||
void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
|
||||
{
|
||||
compressDXT1(rgba, &dxtBlock->color);
|
||||
compressDXT3A(rgba, &dxtBlock->alpha);
|
||||
OptimalCompress::compressDXT3A(rgba, &dxtBlock->alpha);
|
||||
}
|
||||
|
||||
|
||||
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
|
||||
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount/*=8*/)
|
||||
{
|
||||
uint8 alpha0 = 0;
|
||||
uint8 alpha1 = 255;
|
||||
@ -716,8 +553,8 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
|
||||
uint besterror = computeAlphaIndices(rgba, &block);
|
||||
|
||||
AlphaBlockDXT5 bestblock = block;
|
||||
|
||||
while(true)
|
||||
|
||||
for (int i = 0; i < iterationCount; i++)
|
||||
{
|
||||
optimizeAlpha8(rgba, &block);
|
||||
uint error = computeAlphaIndices(rgba, &block);
|
||||
@ -741,9 +578,8 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
|
||||
*dxtBlock = bestblock;
|
||||
}
|
||||
|
||||
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock)
|
||||
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/)
|
||||
{
|
||||
compressDXT1(rgba, &dxtBlock->color);
|
||||
compressDXT5A(rgba, &dxtBlock->alpha);
|
||||
compressDXT5A(rgba, &dxtBlock->alpha, iterationCount);
|
||||
}
|
||||
|
||||
|
@ -37,17 +37,13 @@ namespace nv
|
||||
|
||||
namespace QuickCompress
|
||||
{
|
||||
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
|
||||
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
|
||||
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
|
||||
void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
|
||||
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
|
||||
|
||||
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
|
||||
void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock);
|
||||
|
||||
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
|
||||
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock);
|
||||
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8);
|
||||
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8);
|
||||
}
|
||||
} // nv namespace
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -159,7 +159,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16])
|
||||
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
const int idx = threadIdx.x;
|
||||
@ -189,6 +189,11 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
||||
colorSums(colors, sums);
|
||||
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
|
||||
|
||||
*sameColor = (axis == make_float3(0, 0, 0));
|
||||
|
||||
// Single color compressor needs unweighted colors.
|
||||
if (*sameColor) colors[idx] = rawColors[idx];
|
||||
|
||||
dps[idx] = dot(rawColors[idx], axis);
|
||||
|
||||
#if __DEVICE_EMULATION__
|
||||
@ -205,43 +210,6 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16])
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
const int idx = threadIdx.x;
|
||||
|
||||
__shared__ float dps[16];
|
||||
|
||||
if (idx < 16)
|
||||
{
|
||||
// Read color and copy to shared mem.
|
||||
uint c = image[(bid) * 16 + idx];
|
||||
|
||||
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
|
||||
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
|
||||
|
||||
// No need to synchronize, 16 < warp size.
|
||||
#if __DEVICE_EMULATION__
|
||||
} __debugsync(); if (idx < 16) {
|
||||
#endif
|
||||
|
||||
// Sort colors along the best fit line.
|
||||
colorSums(colors, sums);
|
||||
float2 axis = bestFitLine(colors, sums[0]);
|
||||
|
||||
dps[idx] = dot(colors[idx], axis);
|
||||
|
||||
#if __DEVICE_EMULATION__
|
||||
} __debugsync(); if (idx < 16) {
|
||||
#endif
|
||||
|
||||
sortColors(dps, xrefs);
|
||||
|
||||
float2 tmp = colors[idx];
|
||||
colors[xrefs[idx]] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Round color to RGB565 and expand
|
||||
@ -258,26 +226,6 @@ inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
|
||||
return v;
|
||||
}
|
||||
|
||||
inline __device__ float2 roundAndExpand56(float2 v, ushort * w)
|
||||
{
|
||||
v.x = rintf(__saturatef(v.x) * 31.0f);
|
||||
v.y = rintf(__saturatef(v.y) * 63.0f);
|
||||
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5);
|
||||
v.x *= 0.03227752766457f; // approximate integer bit expansion.
|
||||
v.y *= 0.01583151765563f;
|
||||
return v;
|
||||
}
|
||||
|
||||
inline __device__ float2 roundAndExpand88(float2 v, ushort * w)
|
||||
{
|
||||
v.x = rintf(__saturatef(v.x) * 255.0f);
|
||||
v.y = rintf(__saturatef(v.y) * 255.0f);
|
||||
*w = ((ushort)v.x << 8) | ((ushort)v.y);
|
||||
v.x *= 1.0f / 255.0f;
|
||||
v.y *= 1.0f / 255.0f;
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Evaluate permutations
|
||||
@ -521,114 +469,6 @@ __device__ float evalPermutation3(const float3 * colors, const float * weights,
|
||||
}
|
||||
*/
|
||||
|
||||
__device__ float evalPermutation4(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
|
||||
{
|
||||
// Compute endpoints using least squares.
|
||||
float2 alphax_sum = make_float2(0.0f, 0.0f);
|
||||
uint akku = 0;
|
||||
|
||||
// Compute alpha & beta for this permutation.
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
const uint bits = permutation >> (2*i);
|
||||
|
||||
alphax_sum += alphaTable4[bits & 3] * colors[i];
|
||||
akku += prods4[bits & 3];
|
||||
}
|
||||
|
||||
float alpha2_sum = float(akku >> 16);
|
||||
float beta2_sum = float((akku >> 8) & 0xff);
|
||||
float alphabeta_sum = float(akku & 0xff);
|
||||
float2 betax_sum = 9.0f * color_sum - alphax_sum;
|
||||
|
||||
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||
|
||||
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
||||
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||
|
||||
// Round a, b to the closest 5-6 color and expand...
|
||||
a = roundAndExpand56(a, start);
|
||||
b = roundAndExpand56(b, end);
|
||||
|
||||
// compute the error
|
||||
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||
|
||||
return (1.0f / 9.0f) * (e.x + e.y);
|
||||
}
|
||||
|
||||
__device__ float evalPermutation3(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
|
||||
{
|
||||
// Compute endpoints using least squares.
|
||||
float2 alphax_sum = make_float2(0.0f, 0.0f);
|
||||
uint akku = 0;
|
||||
|
||||
// Compute alpha & beta for this permutation.
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
const uint bits = permutation >> (2*i);
|
||||
|
||||
alphax_sum += alphaTable3[bits & 3] * colors[i];
|
||||
akku += prods3[bits & 3];
|
||||
}
|
||||
|
||||
float alpha2_sum = float(akku >> 16);
|
||||
float beta2_sum = float((akku >> 8) & 0xff);
|
||||
float alphabeta_sum = float(akku & 0xff);
|
||||
float2 betax_sum = 4.0f * color_sum - alphax_sum;
|
||||
|
||||
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||
|
||||
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
||||
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||
|
||||
// Round a, b to the closest 5-6 color and expand...
|
||||
a = roundAndExpand56(a, start);
|
||||
b = roundAndExpand56(b, end);
|
||||
|
||||
// compute the error
|
||||
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||
|
||||
return (1.0f / 4.0f) * (e.x + e.y);
|
||||
}
|
||||
|
||||
__device__ float evalPermutationCTX(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
|
||||
{
|
||||
// Compute endpoints using least squares.
|
||||
float2 alphax_sum = make_float2(0.0f, 0.0f);
|
||||
uint akku = 0;
|
||||
|
||||
// Compute alpha & beta for this permutation.
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
const uint bits = permutation >> (2*i);
|
||||
|
||||
alphax_sum += alphaTable4[bits & 3] * colors[i];
|
||||
akku += prods4[bits & 3];
|
||||
}
|
||||
|
||||
float alpha2_sum = float(akku >> 16);
|
||||
float beta2_sum = float((akku >> 8) & 0xff);
|
||||
float alphabeta_sum = float(akku & 0xff);
|
||||
float2 betax_sum = 9.0f * color_sum - alphax_sum;
|
||||
|
||||
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||
|
||||
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
||||
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||
|
||||
// Round a, b to the closest 8-8 color and expand...
|
||||
a = roundAndExpand88(a, start);
|
||||
b = roundAndExpand88(b, end);
|
||||
|
||||
// compute the error
|
||||
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||
|
||||
return (1.0f / 9.0f) * (e.x + e.y);
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Evaluate all permutations
|
||||
@ -757,14 +597,12 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
|
||||
}
|
||||
*/
|
||||
|
||||
__device__ void evalAllPermutations(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
|
||||
__device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
|
||||
{
|
||||
const int idx = threadIdx.x;
|
||||
|
||||
float bestError = FLT_MAX;
|
||||
|
||||
__shared__ uint s_permutations[160];
|
||||
|
||||
for(int i = 0; i < 16; i++)
|
||||
{
|
||||
int pidx = idx + NUM_THREADS * i;
|
||||
@ -772,8 +610,7 @@ __device__ void evalAllPermutations(const float2 * colors, float2 colorSum, cons
|
||||
|
||||
ushort start, end;
|
||||
uint permutation = permutations[pidx];
|
||||
if (pidx < 160) s_permutations[pidx] = permutation;
|
||||
|
||||
|
||||
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
|
||||
|
||||
if (error < bestError)
|
||||
@ -791,30 +628,6 @@ __device__ void evalAllPermutations(const float2 * colors, float2 colorSum, cons
|
||||
bestPermutation ^= 0x55555555; // Flip indices.
|
||||
}
|
||||
|
||||
for(int i = 0; i < 3; i++)
|
||||
{
|
||||
int pidx = idx + NUM_THREADS * i;
|
||||
if (pidx >= 160) break;
|
||||
|
||||
ushort start, end;
|
||||
uint permutation = s_permutations[pidx];
|
||||
float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
|
||||
|
||||
if (error < bestError)
|
||||
{
|
||||
bestError = error;
|
||||
bestPermutation = permutation;
|
||||
bestStart = start;
|
||||
bestEnd = end;
|
||||
|
||||
if (bestStart > bestEnd)
|
||||
{
|
||||
swap(bestEnd, bestStart);
|
||||
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors[idx] = bestError;
|
||||
}
|
||||
|
||||
@ -852,40 +665,6 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
|
||||
errors[idx] = bestError;
|
||||
}
|
||||
|
||||
__device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
|
||||
{
|
||||
const int idx = threadIdx.x;
|
||||
|
||||
float bestError = FLT_MAX;
|
||||
|
||||
for(int i = 0; i < 16; i++)
|
||||
{
|
||||
int pidx = idx + NUM_THREADS * i;
|
||||
if (pidx >= 992) break;
|
||||
|
||||
ushort start, end;
|
||||
uint permutation = permutations[pidx];
|
||||
|
||||
float error = evalPermutationCTX(colors, colorSum, permutation, &start, &end);
|
||||
|
||||
if (error < bestError)
|
||||
{
|
||||
bestError = error;
|
||||
bestPermutation = permutation;
|
||||
bestStart = start;
|
||||
bestEnd = end;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestStart < bestEnd)
|
||||
{
|
||||
swap(bestEnd, bestStart);
|
||||
bestPermutation ^= 0x55555555; // Flip indices.
|
||||
}
|
||||
|
||||
errors[idx] = bestError;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Find index with minimum error
|
||||
@ -996,11 +775,6 @@ __device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xr
|
||||
result[bid].y = indices;
|
||||
}
|
||||
|
||||
__device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result)
|
||||
{
|
||||
saveBlockDXT1(start, end, permutation, xrefs, result);
|
||||
}
|
||||
|
||||
__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
@ -1062,6 +836,39 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void compressLevel4DXT1(const uint * permutations, const uint * image, uint2 * result)
|
||||
{
|
||||
__shared__ float3 colors[16];
|
||||
__shared__ float3 sums[16];
|
||||
__shared__ int xrefs[16];
|
||||
__shared__ int sameColor;
|
||||
|
||||
loadColorBlock(image, colors, sums, xrefs, &sameColor);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (sameColor)
|
||||
{
|
||||
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
|
||||
return;
|
||||
}
|
||||
|
||||
ushort bestStart, bestEnd;
|
||||
uint bestPermutation;
|
||||
|
||||
__shared__ float errors[NUM_THREADS];
|
||||
|
||||
evalLevel4Permutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
|
||||
|
||||
// Use a parallel reduction to find minimum error.
|
||||
const int minIdx = findMinError(errors);
|
||||
|
||||
// Only write the result of the winner thread.
|
||||
if (threadIdx.x == minIdx)
|
||||
{
|
||||
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result)
|
||||
{
|
||||
@ -1069,11 +876,18 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
|
||||
__shared__ float3 sums[16];
|
||||
__shared__ float weights[16];
|
||||
__shared__ int xrefs[16];
|
||||
__shared__ int sameColor;
|
||||
|
||||
loadColorBlock(image, colors, sums, weights, xrefs);
|
||||
loadColorBlock(image, colors, sums, weights, xrefs, &sameColor);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (sameColor)
|
||||
{
|
||||
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
|
||||
return;
|
||||
}
|
||||
|
||||
ushort bestStart, bestEnd;
|
||||
uint bestPermutation;
|
||||
|
||||
@ -1092,61 +906,6 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
|
||||
}
|
||||
|
||||
|
||||
__global__ void compressNormalDXT1(const uint * permutations, const uint * image, uint2 * result)
|
||||
{
|
||||
__shared__ float2 colors[16];
|
||||
__shared__ float2 sums[16];
|
||||
__shared__ int xrefs[16];
|
||||
|
||||
loadColorBlock(image, colors, sums, xrefs);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
ushort bestStart, bestEnd;
|
||||
uint bestPermutation;
|
||||
|
||||
__shared__ float errors[NUM_THREADS];
|
||||
|
||||
evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
|
||||
|
||||
// Use a parallel reduction to find minimum error.
|
||||
const int minIdx = findMinError(errors);
|
||||
|
||||
// Only write the result of the winner thread.
|
||||
if (threadIdx.x == minIdx)
|
||||
{
|
||||
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void compressCTX1(const uint * permutations, const uint * image, uint2 * result)
|
||||
{
|
||||
__shared__ float2 colors[16];
|
||||
__shared__ float2 sums[16];
|
||||
__shared__ int xrefs[16];
|
||||
|
||||
loadColorBlock(image, colors, sums, xrefs);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
ushort bestStart, bestEnd;
|
||||
uint bestPermutation;
|
||||
|
||||
__shared__ float errors[NUM_THREADS];
|
||||
|
||||
evalAllPermutationsCTX(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
|
||||
|
||||
// Use a parallel reduction to find minimum error.
|
||||
const int minIdx = findMinError(errors);
|
||||
|
||||
// Only write the result of the winner thread.
|
||||
if (threadIdx.x == minIdx)
|
||||
{
|
||||
saveBlockCTX1(bestStart, bestEnd, bestPermutation, xrefs, result);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
__device__ float computeError(const float weights[16], uchar a0, uchar a1)
|
||||
{
|
||||
@ -1352,17 +1111,12 @@ extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result
|
||||
compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
compressLevel4DXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
compressNormalDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
compressCTX1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -39,17 +39,20 @@ namespace nv
|
||||
|
||||
bool isValid() const;
|
||||
|
||||
void compressDXT1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT3(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT1n(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressCTX1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
|
||||
|
||||
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
|
||||
private:
|
||||
|
||||
uint * m_bitmapTable;
|
||||
uint * m_data;
|
||||
uint * m_result;
|
||||
|
||||
const Image * m_image;
|
||||
nvtt::AlphaMode m_alphaMode;
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
@ -87,64 +87,6 @@ inline __device__ __host__ bool operator ==(float3 a, float3 b)
|
||||
return a.x == b.x && a.y == b.y && a.z == b.z;
|
||||
}
|
||||
|
||||
|
||||
// float2 operators
|
||||
inline __device__ __host__ float2 operator *(float2 a, float2 b)
|
||||
{
|
||||
return make_float2(a.x*b.x, a.y*b.y);
|
||||
}
|
||||
|
||||
inline __device__ __host__ float2 operator *(float f, float2 v)
|
||||
{
|
||||
return make_float2(v.x*f, v.y*f);
|
||||
}
|
||||
|
||||
inline __device__ __host__ float2 operator *(float2 v, float f)
|
||||
{
|
||||
return make_float2(v.x*f, v.y*f);
|
||||
}
|
||||
|
||||
inline __device__ __host__ float2 operator +(float2 a, float2 b)
|
||||
{
|
||||
return make_float2(a.x+b.x, a.y+b.y);
|
||||
}
|
||||
|
||||
inline __device__ __host__ void operator +=(float2 & b, float2 a)
|
||||
{
|
||||
b.x += a.x;
|
||||
b.y += a.y;
|
||||
}
|
||||
|
||||
inline __device__ __host__ float2 operator -(float2 a, float2 b)
|
||||
{
|
||||
return make_float2(a.x-b.x, a.y-b.y);
|
||||
}
|
||||
|
||||
inline __device__ __host__ void operator -=(float2 & b, float2 a)
|
||||
{
|
||||
b.x -= a.x;
|
||||
b.y -= a.y;
|
||||
}
|
||||
|
||||
inline __device__ __host__ float2 operator /(float2 v, float f)
|
||||
{
|
||||
float inv = 1.0f / f;
|
||||
return v * inv;
|
||||
}
|
||||
|
||||
inline __device__ __host__ void operator /=(float2 & b, float f)
|
||||
{
|
||||
float inv = 1.0f / f;
|
||||
b.x *= inv;
|
||||
b.y *= inv;
|
||||
}
|
||||
|
||||
|
||||
inline __device__ __host__ float dot(float2 a, float2 b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y;
|
||||
}
|
||||
|
||||
inline __device__ __host__ float dot(float3 a, float3 b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
@ -206,7 +148,7 @@ inline __device__ bool singleColor(const float3 * colors)
|
||||
bool sameColor = false;
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
sameColor &= (colors[idx] == colors[0]);
|
||||
sameColor &= (colors[i] == colors[0]);
|
||||
}
|
||||
return sameColor;
|
||||
#else
|
||||
@ -301,89 +243,5 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, fl
|
||||
return firstEigenVector(covariance);
|
||||
}
|
||||
|
||||
// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
|
||||
inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
|
||||
{
|
||||
// @@ 8 iterations is probably more than enough.
|
||||
|
||||
float2 v = make_float2(1.0f, 1.0f);
|
||||
for(int i = 0; i < 8; i++) {
|
||||
float x = v.x * matrix[0] + v.y * matrix[1];
|
||||
float y = v.x * matrix[1] + v.y * matrix[2];
|
||||
float m = max(x, y);
|
||||
float iv = 1.0f / m;
|
||||
if (m == 0.0f) iv = 0.0f;
|
||||
v = make_float2(x*iv, y*iv);
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
inline __device__ void colorSums(const float2 * colors, float2 * sums)
|
||||
{
|
||||
#if __DEVICE_EMULATION__
|
||||
float2 color_sum = make_float2(0.0f, 0.0f, 0.0f);
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
color_sum += colors[i];
|
||||
}
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
sums[i] = color_sum;
|
||||
}
|
||||
#else
|
||||
|
||||
const int idx = threadIdx.x;
|
||||
|
||||
sums[idx] = colors[idx];
|
||||
sums[idx] += sums[idx^8];
|
||||
sums[idx] += sums[idx^4];
|
||||
sums[idx] += sums[idx^2];
|
||||
sums[idx] += sums[idx^1];
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
|
||||
{
|
||||
// Compute covariance matrix of the given colors.
|
||||
#if __DEVICE_EMULATION__
|
||||
float covariance[3] = {0, 0, 0};
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
|
||||
covariance[0] += a.x * a.x;
|
||||
covariance[1] += a.x * a.y;
|
||||
covariance[3] += a.y * a.y;
|
||||
}
|
||||
#else
|
||||
|
||||
const int idx = threadIdx.x;
|
||||
|
||||
float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
|
||||
|
||||
__shared__ float covariance[16*3];
|
||||
|
||||
covariance[3 * idx + 0] = diff.x * diff.x;
|
||||
covariance[3 * idx + 1] = diff.x * diff.y;
|
||||
covariance[3 * idx + 2] = diff.y * diff.y;
|
||||
|
||||
for(int d = 8; d > 0; d >>= 1)
|
||||
{
|
||||
if (idx < d)
|
||||
{
|
||||
covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
|
||||
covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
|
||||
covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Compute first eigen vector.
|
||||
return firstEigenVector2D(covariance);
|
||||
}
|
||||
|
||||
|
||||
#endif // CUDAMATH_H
|
||||
|
@ -1,128 +1,300 @@
|
||||
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <nvcore/Debug.h>
|
||||
#include "CudaUtils.h"
|
||||
|
||||
#if defined HAVE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#endif
|
||||
|
||||
using namespace nv;
|
||||
using namespace cuda;
|
||||
|
||||
#if NV_OS_WIN32
|
||||
|
||||
#define WINDOWS_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
static bool isWindowsVista()
|
||||
{
|
||||
OSVERSIONINFO osvi;
|
||||
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
|
||||
|
||||
::GetVersionEx(&osvi);
|
||||
return osvi.dwMajorVersion >= 6;
|
||||
}
|
||||
|
||||
|
||||
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
|
||||
|
||||
static bool isWow32()
|
||||
{
|
||||
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
|
||||
|
||||
BOOL bIsWow64 = FALSE;
|
||||
|
||||
if (NULL != fnIsWow64Process)
|
||||
{
|
||||
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
|
||||
{
|
||||
// Assume 32 bits.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return !bIsWow64;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/// Determine if CUDA is available.
|
||||
bool nv::cuda::isHardwarePresent()
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
#if NV_OS_WIN32
|
||||
if (isWindowsVista()) return false;
|
||||
//if (isWindowsVista() || !isWow32()) return false;
|
||||
#endif
|
||||
int count = deviceCount();
|
||||
if (count == 1)
|
||||
{
|
||||
// Make sure it's not an emulation device.
|
||||
cudaDeviceProp deviceProp;
|
||||
cudaGetDeviceProperties(&deviceProp, 0);
|
||||
|
||||
// deviceProp.name != Device Emulation (CPU)
|
||||
if (deviceProp.major == -1 || deviceProp.minor == -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// @@ Make sure that warp size == 32
|
||||
}
|
||||
|
||||
return count > 0;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Get number of CUDA enabled devices.
|
||||
int nv::cuda::deviceCount()
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
int gpuCount = 0;
|
||||
|
||||
cudaError_t result = cudaGetDeviceCount(&gpuCount);
|
||||
|
||||
if (result == cudaSuccess)
|
||||
{
|
||||
return gpuCount;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Activate the given devices.
|
||||
bool nv::cuda::setDevice(int i)
|
||||
{
|
||||
nvCheck(i < deviceCount());
|
||||
#if defined HAVE_CUDA
|
||||
cudaError_t result = cudaSetDevice(i);
|
||||
return result == cudaSuccess;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <nvcore/Debug.h>
|
||||
#include <nvcore/Library.h>
|
||||
#include "CudaUtils.h"
|
||||
|
||||
#if defined HAVE_CUDA
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#endif
|
||||
|
||||
using namespace nv;
|
||||
using namespace cuda;
|
||||
|
||||
/* @@ Move this to win32 utils or somewhere else.
|
||||
#if NV_OS_WIN32
|
||||
|
||||
#define WINDOWS_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
static bool isWindowsVista()
|
||||
{
|
||||
OSVERSIONINFO osvi;
|
||||
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
|
||||
|
||||
::GetVersionEx(&osvi);
|
||||
return osvi.dwMajorVersion >= 6;
|
||||
}
|
||||
|
||||
|
||||
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
|
||||
|
||||
static bool isWow32()
|
||||
{
|
||||
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
|
||||
|
||||
BOOL bIsWow64 = FALSE;
|
||||
|
||||
if (NULL != fnIsWow64Process)
|
||||
{
|
||||
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
|
||||
{
|
||||
// Assume 32 bits.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return !bIsWow64;
|
||||
}
|
||||
|
||||
#endif
|
||||
*/
|
||||
|
||||
|
||||
static bool isCudaDriverAvailable(int version)
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
#if NV_OS_WIN32
|
||||
Library nvcuda("nvcuda.dll");
|
||||
#else
|
||||
Library nvcuda(NV_LIBRARY_NAME(cuda));
|
||||
#endif
|
||||
|
||||
if (!nvcuda.isValid())
|
||||
{
|
||||
nvDebug("*** CUDA driver not found.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (version >= 2000)
|
||||
{
|
||||
void * address = nvcuda.bindSymbol("cuStreamCreate");
|
||||
if (address == NULL) {
|
||||
nvDebug("*** CUDA driver version < 2.0.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (version >= 2010)
|
||||
{
|
||||
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
|
||||
if (address == NULL) {
|
||||
nvDebug("*** CUDA driver version < 2.1.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (version >= 2020)
|
||||
{
|
||||
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
|
||||
|
||||
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
|
||||
if (driverGetVersion == NULL) {
|
||||
nvDebug("*** CUDA driver version < 2.2.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
int driverVersion;
|
||||
CUresult err = driverGetVersion(&driverVersion);
|
||||
if (err != CUDA_SUCCESS) {
|
||||
nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
|
||||
return false;
|
||||
}
|
||||
|
||||
return driverVersion >= version;
|
||||
}
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/// Determine if CUDA is available.
|
||||
bool nv::cuda::isHardwarePresent()
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
// Make sure that CUDA driver matches CUDA runtime.
|
||||
if (!isCudaDriverAvailable(CUDART_VERSION))
|
||||
{
|
||||
nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
|
||||
return false;
|
||||
}
|
||||
|
||||
int count = deviceCount();
|
||||
if (count == 1)
|
||||
{
|
||||
// Make sure it's not an emulation device.
|
||||
cudaDeviceProp deviceProp;
|
||||
cudaGetDeviceProperties(&deviceProp, 0);
|
||||
|
||||
// deviceProp.name != Device Emulation (CPU)
|
||||
if (deviceProp.major == -1 || deviceProp.minor == -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// @@ Make sure that warp size == 32
|
||||
|
||||
// @@ Make sure available GPU is faster than the CPU.
|
||||
|
||||
return count > 0;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Get number of CUDA enabled devices.
|
||||
int nv::cuda::deviceCount()
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
int gpuCount = 0;
|
||||
|
||||
cudaError_t result = cudaGetDeviceCount(&gpuCount);
|
||||
|
||||
if (result == cudaSuccess)
|
||||
{
|
||||
return gpuCount;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Make sure device meets requirements:
|
||||
// - Not an emulation device.
|
||||
// - Not an integrated device?
|
||||
// - Faster than CPU.
|
||||
bool nv::cuda::isValidDevice(int i)
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
cudaDeviceProp device_properties;
|
||||
cudaGetDeviceProperties(&device_properties, i);
|
||||
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
|
||||
|
||||
if (device_properties.major == -1 || device_properties.minor == -1) {
|
||||
// Emulation device.
|
||||
return false;
|
||||
}
|
||||
|
||||
#if CUDART_VERSION >= 2030 // 2.3
|
||||
/*if (device_properties.integrated)
|
||||
{
|
||||
// Integrated devices.
|
||||
return false;
|
||||
}*/
|
||||
#endif
|
||||
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
int nv::cuda::getFastestDevice()
|
||||
{
|
||||
int max_gflops_device = -1;
|
||||
#if defined HAVE_CUDA
|
||||
int max_gflops = 0;
|
||||
|
||||
const int device_count = deviceCount();
|
||||
for (int i = 0; i < device_count; i++)
|
||||
{
|
||||
if (isValidDevice(i))
|
||||
{
|
||||
cudaDeviceProp device_properties;
|
||||
cudaGetDeviceProperties(&device_properties, i);
|
||||
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
|
||||
|
||||
if (gflops > max_gflops)
|
||||
{
|
||||
max_gflops = gflops;
|
||||
max_gflops_device = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return max_gflops_device;
|
||||
}
|
||||
|
||||
|
||||
/// Activate the given devices.
|
||||
bool nv::cuda::initDevice(int * device_ptr)
|
||||
{
|
||||
nvDebugCheck(device_ptr != NULL);
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
#if CUDART_VERSION >= 2030 // 2.3
|
||||
|
||||
// Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active.
|
||||
cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield);
|
||||
|
||||
#endif
|
||||
|
||||
int device = getFastestDevice();
|
||||
|
||||
if (device == -1)
|
||||
{
|
||||
// No device is fast enough.
|
||||
*device_ptr = -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Select CUDA device.
|
||||
cudaError_t result = cudaSetDevice(device);
|
||||
|
||||
if (result == cudaErrorSetOnActiveProcess)
|
||||
{
|
||||
int device;
|
||||
result = cudaGetDevice(&device);
|
||||
|
||||
*device_ptr = -1; // No device to cleanup.
|
||||
return isValidDevice(device); // Return true if device is valid.
|
||||
}
|
||||
else if (result != cudaSuccess)
|
||||
{
|
||||
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
|
||||
*device_ptr = -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
*device_ptr = device;
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void nv::cuda::exitDevice()
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
cudaError_t result = cudaThreadExit();
|
||||
|
||||
if (result != cudaSuccess) {
|
||||
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -31,7 +31,11 @@ namespace nv
|
||||
{
|
||||
bool isHardwarePresent();
|
||||
int deviceCount();
|
||||
bool setDevice(int i);
|
||||
int getFastestDevice();
|
||||
bool isValidDevice(int i);
|
||||
|
||||
bool initDevice(int * device_ptr);
|
||||
void exitDevice();
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
@ -1,60 +0,0 @@
|
||||
|
||||
#include "nvtt_experimental.h"
|
||||
|
||||
struct NvttImage
|
||||
{
|
||||
NvttImage() :
|
||||
m_constant(false),
|
||||
m_image(NULL),
|
||||
m_floatImage(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
~NvttImage()
|
||||
{
|
||||
if (m_constant && m_image) m_image->unwrap();
|
||||
delete m_image;
|
||||
delete m_floatImage;
|
||||
}
|
||||
|
||||
bool m_constant;
|
||||
Image * m_image;
|
||||
FloatImage * m_floatImage;
|
||||
};
|
||||
|
||||
NvttImage * nvttCreateImage()
|
||||
{
|
||||
return new NvttImage();
|
||||
}
|
||||
|
||||
void nvttDestroyImage(NvttImage * img)
|
||||
{
|
||||
delete img;
|
||||
}
|
||||
|
||||
void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data)
|
||||
{
|
||||
nvCheck(img != NULL);
|
||||
|
||||
if (format == NVTT_InputFormat_BGRA_8UB)
|
||||
{
|
||||
img->m_constant = false;
|
||||
img->m_image->allocate(w, h);
|
||||
memcpy(img->m_image->pixels(), data, w * h * 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
nvCheck(false);
|
||||
}
|
||||
}
|
||||
|
||||
void nvttCompressImage(NvttImage * img, NvttFormat format)
|
||||
{
|
||||
nvCheck(img != NULL);
|
||||
|
||||
// @@ Invoke appropriate compressor.
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif // NVTT_EXPERIMENTAL_H
|
@ -1,55 +0,0 @@
|
||||
|
||||
#ifndef NVTT_EXPERIMENTAL_H
|
||||
#define NVTT_EXPERIMENTAL_H
|
||||
|
||||
#include <nvtt/nvtt.h>
|
||||
|
||||
typedef struct NvttImage NvttImage;
|
||||
|
||||
NvttImage * nvttCreateImage();
|
||||
void nvttDestroyImage(NvttImage * img);
|
||||
|
||||
void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data);
|
||||
|
||||
void nvttCompressImage(NvttImage * img, NvttFormat format);
|
||||
|
||||
// How to control the compression parameters?
|
||||
|
||||
// Using many arguments:
|
||||
// void nvttCompressImage(img, format, quality, r, g, b, a, ...);
|
||||
|
||||
// Using existing compression option class:
|
||||
// compressionOptions = nvttCreateCompressionOptions();
|
||||
// nvttSetCompressionOptionsFormat(compressionOptions, format);
|
||||
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
|
||||
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
|
||||
// nvttSetCompressionOptionsColorWeights(compressionOptions, r, g, b, a);
|
||||
// ...
|
||||
// nvttCompressImage(img, compressionOptions);
|
||||
|
||||
// Using thread local context state:
|
||||
// void nvttSetCompressionFormat(format);
|
||||
// void nvttSetCompressionQuality(quality);
|
||||
// void nvttSetCompressionColorWeights(r, g, b, a);
|
||||
// ...
|
||||
// nvttCompressImage(img);
|
||||
|
||||
// Using thread local context state, but with GL style function arguments:
|
||||
// nvttCompressorParameteri(NVTT_FORMAT, format);
|
||||
// nvttCompressorParameteri(NVTT_QUALITY, quality);
|
||||
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_RED, r);
|
||||
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_GREEN, g);
|
||||
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_BLUE, b);
|
||||
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_ALPHA, a);
|
||||
// or nvttCompressorParameter4f(NVTT_COLOR_WEIGHTS, r, g, b, a);
|
||||
// ...
|
||||
// nvttCompressImage(img);
|
||||
|
||||
// How do we get the compressed output?
|
||||
// - Using callbacks. (via new entrypoints, or through outputOptions)
|
||||
// - Return it explicitely from nvttCompressImage.
|
||||
// - Store it along the image, retrieve later explicitely with 'nvttGetCompressedData(img, ...)'
|
||||
|
||||
|
||||
|
||||
#endif // NVTT_EXPERIMENTAL_H
|
@ -73,7 +73,7 @@ namespace nvtt
|
||||
Format_DXT1a, // DXT1 with binary alpha.
|
||||
Format_DXT3,
|
||||
Format_DXT5,
|
||||
Format_DXT5n, // Compressed HILO: R=0, G=x, B=0, A=y
|
||||
Format_DXT5n, // Compressed HILO: R=1, G=y, B=0, A=x
|
||||
|
||||
// DX10 formats.
|
||||
Format_BC1 = Format_DXT1,
|
||||
@ -83,9 +83,6 @@ namespace nvtt
|
||||
Format_BC3n = Format_DXT5n,
|
||||
Format_BC4, // ATI1
|
||||
Format_BC5, // 3DC, ATI2
|
||||
|
||||
Format_DXT1n,
|
||||
Format_CTX1,
|
||||
};
|
||||
|
||||
/// Quality modes.
|
||||
@ -157,7 +154,6 @@ namespace nvtt
|
||||
{
|
||||
ColorTransform_None,
|
||||
ColorTransform_Linear,
|
||||
ColorTransform_Swizzle
|
||||
};
|
||||
|
||||
/// Extents rounding mode.
|
||||
@ -198,7 +194,7 @@ namespace nvtt
|
||||
// Describe the format of the input.
|
||||
NVTT_API void setFormat(InputFormat format);
|
||||
|
||||
// Set the way the input alpha channel is interpreted. @@ Not implemented!
|
||||
// Set the way the input alpha channel is interpreted.
|
||||
NVTT_API void setAlphaMode(AlphaMode alphaMode);
|
||||
|
||||
// Set gamma settings.
|
||||
@ -222,14 +218,10 @@ namespace nvtt
|
||||
// Set color transforms. @@ Not implemented!
|
||||
NVTT_API void setColorTransform(ColorTransform t);
|
||||
NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3);
|
||||
NVTT_API void setSwizzleTransform(int x, int y, int z, int w);
|
||||
|
||||
// Set resizing options.
|
||||
NVTT_API void setMaxExtents(int d);
|
||||
NVTT_API void setRoundMode(RoundMode mode);
|
||||
|
||||
// Set whether or not to premultiply color by alpha
|
||||
NVTT_API void setPremultiplyAlpha(bool b);
|
||||
};
|
||||
|
||||
|
||||
|
@ -1,13 +1,8 @@
|
||||
PROJECT(squish)
|
||||
ENABLE_TESTING()
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
SET(SQUISH_SRCS
|
||||
# alpha.cpp
|
||||
# alpha.h
|
||||
# clusterfit.cpp
|
||||
# clusterfit.h
|
||||
fastclusterfit.cpp
|
||||
fastclusterfit.h
|
||||
weightedclusterfit.cpp
|
||||
@ -21,32 +16,13 @@ SET(SQUISH_SRCS
|
||||
config.h
|
||||
maths.cpp
|
||||
maths.h
|
||||
# rangefit.cpp
|
||||
# rangefit.h
|
||||
# singlecolourfit.cpp
|
||||
# singlecolourfit.h
|
||||
# singlecolourlookup.inl
|
||||
# squish.cpp
|
||||
# squish.h
|
||||
simd.h
|
||||
simd_sse.h
|
||||
simd_ve.h)
|
||||
|
||||
ADD_LIBRARY(squish STATIC ${SQUISH_SRCS})
|
||||
|
||||
# libpng
|
||||
#FIND_PACKAGE(PNG)
|
||||
|
||||
#IF(PNG_FOUND)
|
||||
# INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR})
|
||||
# ADD_EXECUTABLE(squishpng extra/squishpng.cpp)
|
||||
# TARGET_LINK_LIBRARIES(squishpng squish ${PNG_LIBRARY})
|
||||
#ENDIF(PNG_FOUND)
|
||||
|
||||
##ADD_EXECUTABLE(squishgen extra/squishgen.cpp)
|
||||
|
||||
#ADD_EXECUTABLE(squishtest extra/squishtest.cpp)
|
||||
#TARGET_LINK_LIBRARIES(squishtest squish)
|
||||
|
||||
#ADD_TEST(SQUISHTEST squishtest)
|
||||
IF(CMAKE_COMPILER_IS_GNUCXX)
|
||||
SET_TARGET_PROPERTIES(squish PROPERTIES COMPILE_FLAGS -fPIC)
|
||||
ENDIF(CMAKE_COMPILER_IS_GNUCXX)
|
||||
|
||||
|
@ -29,6 +29,8 @@
|
||||
#include "colourblock.h"
|
||||
#include <cfloat>
|
||||
|
||||
#include "fastclusterlookup.inl"
|
||||
|
||||
namespace squish {
|
||||
|
||||
FastClusterFit::FastClusterFit()
|
||||
@ -97,91 +99,6 @@ void FastClusterFit::SetColourSet( ColourSet const* colours, int flags )
|
||||
}
|
||||
|
||||
|
||||
struct Precomp {
|
||||
float alpha2_sum;
|
||||
float beta2_sum;
|
||||
float alphabeta_sum;
|
||||
float factor;
|
||||
};
|
||||
|
||||
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
|
||||
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
|
||||
|
||||
void FastClusterFit::DoPrecomputation()
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
// Three element clusters:
|
||||
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
|
||||
{
|
||||
for( int c1 = 0; c1 <= 16-c0; c1++)
|
||||
{
|
||||
int c2 = 16 - c0 - c1;
|
||||
|
||||
/*if (c2 == 16) {
|
||||
// a = b = x2 / 16
|
||||
s_threeElement[i].alpha2_sum = 0;
|
||||
s_threeElement[i].beta2_sum = 16;
|
||||
s_threeElement[i].alphabeta_sum = -16;
|
||||
s_threeElement[i].factor = 1.0f / 256.0f;
|
||||
}
|
||||
else if (c0 == 16) {
|
||||
// a = b = x0 / 16
|
||||
s_threeElement[i].alpha2_sum = 16;
|
||||
s_threeElement[i].beta2_sum = 0;
|
||||
s_threeElement[i].alphabeta_sum = -16;
|
||||
s_threeElement[i].factor = 1.0f / 256.0f;
|
||||
}
|
||||
else*/ {
|
||||
s_threeElement[i].alpha2_sum = c0 + c1 * 0.25f;
|
||||
s_threeElement[i].beta2_sum = c2 + c1 * 0.25f;
|
||||
s_threeElement[i].alphabeta_sum = c1 * 0.25f;
|
||||
s_threeElement[i].factor = 1.0f / (s_threeElement[i].alpha2_sum * s_threeElement[i].beta2_sum - s_threeElement[i].alphabeta_sum * s_threeElement[i].alphabeta_sum);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
//printf("%d three cluster elements\n", i);
|
||||
|
||||
// Four element clusters:
|
||||
i = 0;
|
||||
for( int c0 = 0; c0 <= 16; c0++)
|
||||
{
|
||||
for( int c1 = 0; c1 <= 16-c0; c1++)
|
||||
{
|
||||
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
|
||||
{
|
||||
int c3 = 16 - c0 - c1 - c2;
|
||||
|
||||
/*if (c3 == 16) {
|
||||
// a = b = x3 / 16
|
||||
s_fourElement[i].alpha2_sum = 16.0f;
|
||||
s_fourElement[i].beta2_sum = 0.0f;
|
||||
s_fourElement[i].alphabeta_sum = -16.0f;
|
||||
s_fourElement[i].factor = 1.0f / 256.0f;
|
||||
}
|
||||
else if (c0 == 16) {
|
||||
// a = b = x0 / 16
|
||||
s_fourElement[i].alpha2_sum = 0.0f;
|
||||
s_fourElement[i].beta2_sum = 16.0f;
|
||||
s_fourElement[i].alphabeta_sum = -16.0f;
|
||||
s_fourElement[i].factor = 1.0f / 256.0f;
|
||||
}
|
||||
else*/ {
|
||||
s_fourElement[i].alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
|
||||
s_fourElement[i].beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
|
||||
s_fourElement[i].alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
|
||||
s_fourElement[i].factor = 1.0f / (s_fourElement[i].alpha2_sum * s_fourElement[i].beta2_sum - s_fourElement[i].alphabeta_sum * s_fourElement[i].alphabeta_sum);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
//printf("%d four cluster elements\n", i);
|
||||
}
|
||||
|
||||
void FastClusterFit::SetMetric(float r, float g, float b)
|
||||
{
|
||||
#if SQUISH_USE_SIMD
|
||||
|
@ -44,8 +44,6 @@ public:
|
||||
void SetMetric(float r, float g, float b);
|
||||
float GetBestError() const;
|
||||
|
||||
static void DoPrecomputation();
|
||||
|
||||
// Make them public
|
||||
virtual void Compress3( void* block );
|
||||
virtual void Compress4( void* block );
|
||||
|
1135
src/nvtt/squish/fastclusterlookup.inl
Normal file
1135
src/nvtt/squish/fastclusterlookup.inl
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user