From ab316deeaa824b430a51fa3e781e8b8cb14cddfb Mon Sep 17 00:00:00 2001 From: "nathaniel.reed@gmail.com" Date: Sat, 7 Dec 2013 02:17:08 +0000 Subject: [PATCH] Add BC7 support. It's incredibly slow - ~60 seconds to compress a 512x512 image, on a Core i7 - but it works. - Added AVPCL compressor to projects and got it building with VC9 and VC10. - Removed unused command line interface & file read/write code from AVPCL. - Convert AVPCL to use NV vector math lib, asserts, etc. - Convert AVPCL to use double instead of float. - Added 4x4 symmetric eigensolver, for AVPCL; it's based on the existing 3x3 one, but I had to rewrite the Householder reduction stage. As with ZOH, using the eigensolver (instead of SVD) gives a ~25% speedup without significantly affecting RMSE. - Encapsulate ZOH and AVPCL stuff into their own namespaces to keep everything separate. - Added some missing vector operators to the nvmath lib. --- project/vc10/bc7/bc7.vcxproj | 152 ++ project/vc10/nvassemble/nvassemble.vcxproj | 3 + project/vc10/nvcompress/nvcompress.vcxproj | 3 + project/vc10/nvddsinfo/nvddsinfo.vcxproj | 3 + .../vc10/nvdecompress/nvdecompress.vcxproj | 3 + project/vc10/nvimgdiff/nvimgdiff.vcxproj | 3 + project/vc10/nvtt.sln | 18 + project/vc10/nvtt/nvtt.vcxproj | 3 + project/vc10/nvzoom/nvzoom.vcxproj | 3 + project/vc9/bc7/bc7.vcproj | 340 ++++ project/vc9/nvtt.sln | 25 + src/nvimage/BlockDXT.cpp | 37 +- src/nvimage/BlockDXT.h | 9 +- src/nvimage/DirectDrawSurface.cpp | 6 + src/nvmath/Fitting.cpp | 1456 +++++++++++------ src/nvmath/Fitting.h | 14 +- src/nvmath/Vector.h | 3 + src/nvmath/Vector.inl | 28 + src/nvtt/CompressorDX11.cpp | 41 +- src/nvtt/Context.cpp | 3 +- src/nvtt/Surface.cpp | 8 +- src/nvtt/bc6h/bits.h | 28 +- src/nvtt/bc6h/shapes_two.h | 4 +- src/nvtt/bc6h/tile.h | 11 +- src/nvtt/bc6h/utils.cpp | 1 + src/nvtt/bc6h/utils.h | 29 +- src/nvtt/bc6h/zoh.cpp | 4 +- src/nvtt/bc6h/zoh.h | 47 +- src/nvtt/bc6h/zohone.cpp | 9 +- src/nvtt/bc6h/zohtwo.cpp | 9 +- src/nvtt/bc7/CMakeLists.txt | 30 + src/nvtt/bc7/ImfArray.h | 261 --- src/nvtt/bc7/arvo/ArvoMath.cpp | 342 ---- src/nvtt/bc7/arvo/ArvoMath.h | 212 --- src/nvtt/bc7/arvo/Char.cpp | 420 ----- src/nvtt/bc7/arvo/Char.h | 245 --- src/nvtt/bc7/arvo/Complex.cpp | 76 - src/nvtt/bc7/arvo/Complex.h | 187 --- src/nvtt/bc7/arvo/Matrix.cpp | 1201 -------------- src/nvtt/bc7/arvo/Matrix.h | 142 -- src/nvtt/bc7/arvo/Perm.cpp | 503 ------ src/nvtt/bc7/arvo/Perm.h | 111 -- src/nvtt/bc7/arvo/Rand.cpp | 230 --- src/nvtt/bc7/arvo/Rand.h | 114 -- src/nvtt/bc7/arvo/SI_units.h | 232 --- src/nvtt/bc7/arvo/SVD.cpp | 398 ----- src/nvtt/bc7/arvo/SVD.h | 54 - src/nvtt/bc7/arvo/SphTri.cpp | 292 ---- src/nvtt/bc7/arvo/SphTri.h | 124 -- src/nvtt/bc7/arvo/Token.cpp | 913 ----------- src/nvtt/bc7/arvo/Token.h | 203 --- src/nvtt/bc7/arvo/Vec2.cpp | 94 -- src/nvtt/bc7/arvo/Vec2.h | 358 ---- src/nvtt/bc7/arvo/Vec3.cpp | 119 -- src/nvtt/bc7/arvo/Vec3.h | 517 ------ src/nvtt/bc7/arvo/Vec4.cpp | 79 - src/nvtt/bc7/arvo/Vec4.h | 238 --- src/nvtt/bc7/arvo/Vector.cpp | 366 ----- src/nvtt/bc7/arvo/Vector.h | 103 -- src/nvtt/bc7/arvo/form.h | 26 - src/nvtt/bc7/avpcl.cpp | 80 +- src/nvtt/bc7/avpcl.h | 150 +- src/nvtt/bc7/avpcl.sln | 21 - src/nvtt/bc7/avpcl.vcproj | 314 ---- src/nvtt/bc7/avpcl_mode0.cpp | 236 ++- src/nvtt/bc7/avpcl_mode1.cpp | 238 ++- src/nvtt/bc7/avpcl_mode2.cpp | 224 ++- src/nvtt/bc7/avpcl_mode3.cpp | 236 ++- src/nvtt/bc7/avpcl_mode4.cpp | 290 ++-- src/nvtt/bc7/avpcl_mode5.cpp | 290 ++-- src/nvtt/bc7/avpcl_mode6.cpp | 223 ++- src/nvtt/bc7/avpcl_mode7.cpp | 226 ++- src/nvtt/bc7/avpclc.cpp | 348 ---- src/nvtt/bc7/bits.h | 28 +- src/nvtt/bc7/endpts.h | 27 +- src/nvtt/bc7/rgba.h | 27 - src/nvtt/bc7/shapes_three.h | 4 +- src/nvtt/bc7/shapes_two.h | 4 +- src/nvtt/bc7/targa.cpp | 179 -- src/nvtt/bc7/targa.h | 30 - src/nvtt/bc7/tile.h | 41 +- src/nvtt/bc7/utils.cpp | 237 ++- src/nvtt/bc7/utils.h | 58 +- src/nvtt/nvtt.h | 2 +- src/nvtt/tests/testsuite.cpp | 12 +- src/nvtt/tools/compress.cpp | 7 +- 86 files changed, 2944 insertions(+), 11081 deletions(-) create mode 100644 project/vc10/bc7/bc7.vcxproj create mode 100644 project/vc9/bc7/bc7.vcproj create mode 100644 src/nvtt/bc7/CMakeLists.txt delete mode 100644 src/nvtt/bc7/ImfArray.h delete mode 100644 src/nvtt/bc7/arvo/ArvoMath.cpp delete mode 100644 src/nvtt/bc7/arvo/ArvoMath.h delete mode 100644 src/nvtt/bc7/arvo/Char.cpp delete mode 100644 src/nvtt/bc7/arvo/Char.h delete mode 100644 src/nvtt/bc7/arvo/Complex.cpp delete mode 100644 src/nvtt/bc7/arvo/Complex.h delete mode 100644 src/nvtt/bc7/arvo/Matrix.cpp delete mode 100644 src/nvtt/bc7/arvo/Matrix.h delete mode 100644 src/nvtt/bc7/arvo/Perm.cpp delete mode 100644 src/nvtt/bc7/arvo/Perm.h delete mode 100644 src/nvtt/bc7/arvo/Rand.cpp delete mode 100644 src/nvtt/bc7/arvo/Rand.h delete mode 100644 src/nvtt/bc7/arvo/SI_units.h delete mode 100644 src/nvtt/bc7/arvo/SVD.cpp delete mode 100644 src/nvtt/bc7/arvo/SVD.h delete mode 100644 src/nvtt/bc7/arvo/SphTri.cpp delete mode 100644 src/nvtt/bc7/arvo/SphTri.h delete mode 100644 src/nvtt/bc7/arvo/Token.cpp delete mode 100644 src/nvtt/bc7/arvo/Token.h delete mode 100644 src/nvtt/bc7/arvo/Vec2.cpp delete mode 100644 src/nvtt/bc7/arvo/Vec2.h delete mode 100644 src/nvtt/bc7/arvo/Vec3.cpp delete mode 100644 src/nvtt/bc7/arvo/Vec3.h delete mode 100644 src/nvtt/bc7/arvo/Vec4.cpp delete mode 100644 src/nvtt/bc7/arvo/Vec4.h delete mode 100644 src/nvtt/bc7/arvo/Vector.cpp delete mode 100644 src/nvtt/bc7/arvo/Vector.h delete mode 100644 src/nvtt/bc7/arvo/form.h delete mode 100644 src/nvtt/bc7/avpcl.sln delete mode 100644 src/nvtt/bc7/avpcl.vcproj delete mode 100644 src/nvtt/bc7/avpclc.cpp delete mode 100644 src/nvtt/bc7/rgba.h delete mode 100644 src/nvtt/bc7/targa.cpp delete mode 100644 src/nvtt/bc7/targa.h diff --git a/project/vc10/bc7/bc7.vcxproj b/project/vc10/bc7/bc7.vcxproj new file mode 100644 index 0000000..9d052bf --- /dev/null +++ b/project/vc10/bc7/bc7.vcxproj @@ -0,0 +1,152 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {F974F34B-AF02-4C88-8E1E-85475094EA78} + bc7 + + + + StaticLibrary + MultiByte + true + + + StaticLibrary + MultiByte + + + StaticLibrary + MultiByte + true + + + StaticLibrary + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + $(Configuration)\$(Platform)\ + $(Configuration)\$(Platform)\ + $(Configuration)\$(Platform)\ + $(Configuration)\$(Platform)\ + $(Configuration)\$(Platform)\ + $(Configuration)\$(Platform)\ + $(Configuration)\$(Platform)\ + $(Configuration)\$(Platform)\ + + + + Disabled + true + EnableFastChecks + MultiThreadedDebugDLL + Level3 + EditAndContinue + $(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories) + + + + + X64 + + + Disabled + true + EnableFastChecks + MultiThreadedDebugDLL + Level3 + ProgramDatabase + $(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories) + + + + + MaxSpeed + true + true + MultiThreadedDLL + false + StreamingSIMDExtensions2 + Level3 + ProgramDatabase + true + $(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories) + + + + + X64 + + + MaxSpeed + true + MultiThreadedDLL + false + Level3 + ProgramDatabase + $(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories) + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/project/vc10/nvassemble/nvassemble.vcxproj b/project/vc10/nvassemble/nvassemble.vcxproj index 6584357..1186e4b 100755 --- a/project/vc10/nvassemble/nvassemble.vcxproj +++ b/project/vc10/nvassemble/nvassemble.vcxproj @@ -181,6 +181,9 @@ {c33787e3-5564-4834-9fe3-a9020455a669} + + {f974f34b-af02-4c88-8e1e-85475094ea78} + {f143d180-d4c4-4037-b3de-be89a21c8d1d} false diff --git a/project/vc10/nvcompress/nvcompress.vcxproj b/project/vc10/nvcompress/nvcompress.vcxproj index d94b560..7de782b 100755 --- a/project/vc10/nvcompress/nvcompress.vcxproj +++ b/project/vc10/nvcompress/nvcompress.vcxproj @@ -355,6 +355,9 @@ {c33787e3-5564-4834-9fe3-a9020455a669} + + {f974f34b-af02-4c88-8e1e-85475094ea78} + {f143d180-d4c4-4037-b3de-be89a21c8d1d} false diff --git a/project/vc10/nvddsinfo/nvddsinfo.vcxproj b/project/vc10/nvddsinfo/nvddsinfo.vcxproj index 47a8f8f..e8131bf 100755 --- a/project/vc10/nvddsinfo/nvddsinfo.vcxproj +++ b/project/vc10/nvddsinfo/nvddsinfo.vcxproj @@ -181,6 +181,9 @@ {c33787e3-5564-4834-9fe3-a9020455a669} + + {f974f34b-af02-4c88-8e1e-85475094ea78} + {f143d180-d4c4-4037-b3de-be89a21c8d1d} false diff --git a/project/vc10/nvdecompress/nvdecompress.vcxproj b/project/vc10/nvdecompress/nvdecompress.vcxproj index 656398d..e7bff8c 100755 --- a/project/vc10/nvdecompress/nvdecompress.vcxproj +++ b/project/vc10/nvdecompress/nvdecompress.vcxproj @@ -207,6 +207,9 @@ {c33787e3-5564-4834-9fe3-a9020455a669} + + {f974f34b-af02-4c88-8e1e-85475094ea78} + {f143d180-d4c4-4037-b3de-be89a21c8d1d} false diff --git a/project/vc10/nvimgdiff/nvimgdiff.vcxproj b/project/vc10/nvimgdiff/nvimgdiff.vcxproj index 88401d8..c472aa1 100755 --- a/project/vc10/nvimgdiff/nvimgdiff.vcxproj +++ b/project/vc10/nvimgdiff/nvimgdiff.vcxproj @@ -209,6 +209,9 @@ {c33787e3-5564-4834-9fe3-a9020455a669} + + {f974f34b-af02-4c88-8e1e-85475094ea78} + {f143d180-d4c4-4037-b3de-be89a21c8d1d} false diff --git a/project/vc10/nvtt.sln b/project/vc10/nvtt.sln index 28334ac..61c75bc 100644 --- a/project/vc10/nvtt.sln +++ b/project/vc10/nvtt.sln @@ -45,6 +45,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc6h", "bc6h\bc6h.vcxproj", EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvthread", "nvthread\nvthread.vcxproj", "{4CFD4876-A026-46C2-AFCF-FB11346E815D}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc7", "bc7\bc7.vcxproj", "{F974F34B-AF02-4C88-8E1E-85475094EA78}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -313,6 +315,22 @@ Global {4CFD4876-A026-46C2-AFCF-FB11346E815D}.Release-CUDA|Win32.Build.0 = Release|Win32 {4CFD4876-A026-46C2-AFCF-FB11346E815D}.Release-CUDA|x64.ActiveCfg = Release|x64 {4CFD4876-A026-46C2-AFCF-FB11346E815D}.Release-CUDA|x64.Build.0 = Release|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|Win32.ActiveCfg = Debug|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|Win32.Build.0 = Debug|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|x64.ActiveCfg = Debug|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|x64.Build.0 = Debug|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|Win32.Build.0 = Debug|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|x64.ActiveCfg = Debug|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|x64.Build.0 = Debug|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|Win32.ActiveCfg = Release|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|Win32.Build.0 = Release|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|x64.ActiveCfg = Release|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|x64.Build.0 = Release|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|Win32.ActiveCfg = Release|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|Win32.Build.0 = Release|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|x64.ActiveCfg = Release|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/project/vc10/nvtt/nvtt.vcxproj b/project/vc10/nvtt/nvtt.vcxproj index 4453155..82673cb 100755 --- a/project/vc10/nvtt/nvtt.vcxproj +++ b/project/vc10/nvtt/nvtt.vcxproj @@ -471,6 +471,9 @@ {c33787e3-5564-4834-9fe3-a9020455a669} false + + {f974f34b-af02-4c88-8e1e-85475094ea78} + {f143d180-d4c4-4037-b3de-be89a21c8d1d} false diff --git a/project/vc10/nvzoom/nvzoom.vcxproj b/project/vc10/nvzoom/nvzoom.vcxproj index 97416ec..656e73c 100755 --- a/project/vc10/nvzoom/nvzoom.vcxproj +++ b/project/vc10/nvzoom/nvzoom.vcxproj @@ -200,6 +200,9 @@ {c33787e3-5564-4834-9fe3-a9020455a669} + + {f974f34b-af02-4c88-8e1e-85475094ea78} + {f143d180-d4c4-4037-b3de-be89a21c8d1d} false diff --git a/project/vc9/bc7/bc7.vcproj b/project/vc9/bc7/bc7.vcproj new file mode 100644 index 0000000..38e0d1f --- /dev/null +++ b/project/vc9/bc7/bc7.vcproj @@ -0,0 +1,340 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/project/vc9/nvtt.sln b/project/vc9/nvtt.sln index 5a52ecf..8d41bc3 100644 --- a/project/vc9/nvtt.sln +++ b/project/vc9/nvtt.sln @@ -5,6 +5,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", ProjectSection(ProjectDependencies) = postProject {CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38} {3DD3A43D-C6EA-460F-821B-6C339A03C5BB} = {3DD3A43D-C6EA-460F-821B-6C339A03C5BB} + {F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} @@ -13,6 +14,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}" ProjectSection(ProjectDependencies) = postProject + {F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} @@ -35,6 +37,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squish", "squish\squish.vcp EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvddsinfo.vcproj", "{841B73C5-C679-4EEF-A50A-7D6106642B49}" ProjectSection(ProjectDependencies) = postProject + {F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} @@ -43,6 +46,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvdd EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}" ProjectSection(ProjectDependencies) = postProject + {F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} @@ -51,6 +55,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompres EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}" ProjectSection(ProjectDependencies) = postProject + {F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} @@ -59,6 +64,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvim EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}" ProjectSection(ProjectDependencies) = postProject + {F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} @@ -67,6 +73,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nv EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}" ProjectSection(ProjectDependencies) = postProject + {F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} @@ -110,6 +117,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hdrtest", "hdrtest\hdrtest. {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc7", "bc7\bc7.vcproj", "{F974F34B-AF02-4C88-8E1E-85475094EA78}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -410,6 +419,22 @@ Global {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|Win32.Build.0 = Release|Win32 {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|x64.ActiveCfg = Release|x64 {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|x64.Build.0 = Release|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|Win32.ActiveCfg = Debug|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|Win32.Build.0 = Debug|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|x64.ActiveCfg = Debug|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|x64.Build.0 = Debug|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|Win32.Build.0 = Debug|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|x64.ActiveCfg = Debug|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|x64.Build.0 = Debug|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|Win32.ActiveCfg = Release|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|Win32.Build.0 = Release|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|x64.ActiveCfg = Release|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|x64.Build.0 = Release|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|Win32.ActiveCfg = Release|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|Win32.Build.0 = Release|Win32 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|x64.ActiveCfg = Release|x64 + {F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/nvimage/BlockDXT.cpp b/src/nvimage/BlockDXT.cpp index 8d2a83c..cc5d081 100644 --- a/src/nvimage/BlockDXT.cpp +++ b/src/nvimage/BlockDXT.cpp @@ -27,9 +27,10 @@ #include "nvcore/Stream.h" #include "nvcore/Utils.h" // swap #include "nvmath/Half.h" +#include "nvmath/Vector.inl" #include "nvtt/bc6h/zoh.h" -#include "nvtt/bc6h/utils.h" +#include "nvtt/bc7/avpcl.h" using namespace nv; @@ -617,7 +618,7 @@ void BlockCTX1::setIndices(int * idx) /// Decode BC6 block. void BlockBC6::decodeBlock(ColorSet * set) const { - Tile tile(4, 4); + ZOH::Tile tile(4, 4); ZOH::decompress((const char *)data, tile); // Convert ZOH's tile struct back to NVTT's, and convert half to float. @@ -626,9 +627,9 @@ void BlockBC6::decodeBlock(ColorSet * set) const { for (uint x = 0; x < 4; ++x) { - uint16 rHalf = Tile::float2half(tile.data[y][x].x); - uint16 gHalf = Tile::float2half(tile.data[y][x].y); - uint16 bHalf = Tile::float2half(tile.data[y][x].z); + uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x); + uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y); + uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z); set->colors[y * 4 + x].x = to_float(rHalf); set->colors[y * 4 + x].y = to_float(gHalf); set->colors[y * 4 + x].z = to_float(bHalf); @@ -641,6 +642,26 @@ void BlockBC6::decodeBlock(ColorSet * set) const } +/// Decode BC7 block. +void BlockBC7::decodeBlock(ColorBlock * block) const +{ + AVPCL::Tile tile(4, 4); + AVPCL::decompress((const char *)data, tile); + + // Convert AVPCL's tile struct back to NVTT's. + for (uint y = 0; y < 4; ++y) + { + for (uint x = 0; x < 4; ++x) + { + Vector4 rgba = tile.data[y][x]; + // Note: decoded rgba values are in [0, 255] range and should be an integer, + // because BC7 never uses more than 8 bits per channel. So no need to round. + block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w)); + } + } +} + + /// Flip CTX1 block vertically. inline void BlockCTX1::flip4() { @@ -707,3 +728,9 @@ Stream & nv::operator<<(Stream & stream, BlockBC6 & block) stream.serialize(&block, sizeof(block)); return stream; } + +Stream & nv::operator<<(Stream & stream, BlockBC7 & block) +{ + stream.serialize(&block, sizeof(block)); + return stream; +} diff --git a/src/nvimage/BlockDXT.h b/src/nvimage/BlockDXT.h index 34f6474..40ba7fe 100644 --- a/src/nvimage/BlockDXT.h +++ b/src/nvimage/BlockDXT.h @@ -220,7 +220,13 @@ namespace nv void decodeBlock(ColorSet * set) const; }; - /// !!!UNDONE: BC7 block + /// BC7 block. + struct BlockBC7 + { + uint8 data[16]; // Not even going to try to write a union for this thing. + void decodeBlock(ColorBlock * block) const; + }; + // Serialization functions. @@ -233,6 +239,7 @@ namespace nv NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI2 & block); NVIMAGE_API Stream & operator<<(Stream & stream, BlockCTX1 & block); NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC6 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC7 & block); } // nv namespace diff --git a/src/nvimage/DirectDrawSurface.cpp b/src/nvimage/DirectDrawSurface.cpp index e95f2f0..9788b62 100644 --- a/src/nvimage/DirectDrawSurface.cpp +++ b/src/nvimage/DirectDrawSurface.cpp @@ -1410,6 +1410,12 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba) } } } + else if (header.hasDX10Header() && header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM) + { + BlockBC7 block; + *stream << block; + block.decodeBlock(rgba); + } else { nvDebugCheck(false); diff --git a/src/nvmath/Fitting.cpp b/src/nvmath/Fitting.cpp index 72d3c42..453ad9b 100644 --- a/src/nvmath/Fitting.cpp +++ b/src/nvmath/Fitting.cpp @@ -1,539 +1,915 @@ -// This code is in the public domain -- Ignacio Castaño - -#include "Fitting.h" -#include "Vector.inl" -#include "Plane.inl" - -#include "nvcore/Utils.h" // max, swap - -#include // FLT_MAX -#include - -using namespace nv; - -// @@ Move to EigenSolver.h - -// @@ We should be able to do something cheaper... -static Vector3 estimatePrincipalComponent(const float * __restrict matrix) -{ - const Vector3 row0(matrix[0], matrix[1], matrix[2]); - const Vector3 row1(matrix[1], matrix[3], matrix[4]); - const Vector3 row2(matrix[2], matrix[4], matrix[5]); - - float r0 = lengthSquared(row0); - float r1 = lengthSquared(row1); - float r2 = lengthSquared(row2); - - if (r0 > r1 && r0 > r2) return row0; - if (r1 > r2) return row1; - return row2; -} - - -static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matrix) -{ - if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) - { - return Vector3(0.0f); - } - - Vector3 v = estimatePrincipalComponent(matrix); - - const int NUM = 8; - for (int i = 0; i < NUM; i++) - { - float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2]; - float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4]; - float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5]; - - float norm = max(max(x, y), z); - - v = Vector3(x, y, z) / norm; - } - - return v; -} - - -Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points) -{ - Vector3 centroid(0.0f); - - for (int i = 0; i < n; i++) - { - centroid += points[i]; - } - centroid /= float(n); - - return centroid; -} - -Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) -{ - Vector3 centroid(0.0f); - float total = 0.0f; - - for (int i = 0; i < n; i++) - { - total += weights[i]; - centroid += weights[i]*points[i]; - } - centroid /= total; - - return centroid; -} - - -Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, float *__restrict covariance) -{ - // compute the centroid - Vector3 centroid = computeCentroid(n, points); - - // compute covariance matrix - for (int i = 0; i < 6; i++) - { - covariance[i] = 0.0f; - } - - for (int i = 0; i < n; i++) - { - Vector3 v = points[i] - centroid; - - covariance[0] += v.x * v.x; - covariance[1] += v.x * v.y; - covariance[2] += v.x * v.z; - covariance[3] += v.y * v.y; - covariance[4] += v.y * v.z; - covariance[5] += v.z * v.z; - } - - return centroid; -} - -Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, float *__restrict covariance) -{ - // compute the centroid - Vector3 centroid = computeCentroid(n, points, weights, metric); - - // compute covariance matrix - for (int i = 0; i < 6; i++) - { - covariance[i] = 0.0f; - } - - for (int i = 0; i < n; i++) - { - Vector3 a = (points[i] - centroid) * metric; - Vector3 b = weights[i]*a; - - covariance[0] += a.x * b.x; - covariance[1] += a.x * b.y; - covariance[2] += a.x * b.z; - covariance[3] += a.y * b.y; - covariance[4] += a.y * b.z; - covariance[5] += a.z * b.z; - } - - return centroid; -} - -Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points) -{ - float matrix[6]; - computeCovariance(n, points, matrix); - - return firstEigenVector_PowerMethod(matrix); -} - -Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) -{ - float matrix[6]; - computeCovariance(n, points, weights, metric, matrix); - - return firstEigenVector_PowerMethod(matrix); -} - - - -static inline Vector3 firstEigenVector_EigenSolver(const float *__restrict matrix) -{ - if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) - { - return Vector3(0.0f); - } - - float eigenValues[3]; - Vector3 eigenVectors[3]; - if (!nv::Fit::eigenSolveSymmetric(matrix, eigenValues, eigenVectors)) - { - return Vector3(0.0f); - } - - return eigenVectors[0]; -} - -Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points) -{ - float matrix[6]; - computeCovariance(n, points, matrix); - - return firstEigenVector_EigenSolver(matrix); -} - -Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) -{ - float matrix[6]; - computeCovariance(n, points, weights, metric, matrix); - - return firstEigenVector_EigenSolver(matrix); -} - -void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R); - -Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points) -{ - // Store the points in an n x n matrix - std::vector Q(n*n, 0.0f); - for (int i = 0; i < n; ++i) - { - Q[i*n+0] = points[i].x; - Q[i*n+1] = points[i].y; - Q[i*n+2] = points[i].z; - } - - // Alloc space for the SVD outputs - std::vector diag(n, 0.0f); - std::vector R(n*n, 0.0f); - - ArvoSVD(n, n, &Q[0], &diag[0], &R[0]); - - // Get the principal component - return Vector3(R[0], R[1], R[2]); -} - - - -Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points) -{ - // compute the centroid and covariance - float matrix[6]; - Vector3 centroid = computeCovariance(n, points, matrix); - - if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) - { - // If no plane defined, then return a horizontal plane. - return Plane(Vector3(0, 0, 1), centroid); - } - - float eigenValues[3]; - Vector3 eigenVectors[3]; - if (!eigenSolveSymmetric(matrix, eigenValues, eigenVectors)) { - // If no plane defined, then return a horizontal plane. - return Plane(Vector3(0, 0, 1), centroid); - } - - return Plane(eigenVectors[2], centroid); -} - -bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON*/) -{ - // compute the centroid and covariance - float matrix[6]; - computeCovariance(n, points, matrix); - - float eigenValues[3]; - Vector3 eigenVectors[3]; - if (!eigenSolveSymmetric(matrix, eigenValues, eigenVectors)) { - return false; - } - - return eigenValues[2] < epsilon; -} - - - -// Tridiagonal solver from Charles Bloom. -// Householder transforms followed by QL decomposition. -// Seems to be based on the code from Numerical Recipes in C. - -static void EigenSolver_Tridiagonal(float mat[3][3],float * diag,float * subd); -static bool EigenSolver_QLAlgorithm(float mat[3][3],float * diag,float * subd); - -bool nv::Fit::eigenSolveSymmetric(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]) -{ - nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL); - - float subd[3]; - float diag[3]; - float work[3][3]; - - work[0][0] = matrix[0]; - work[0][1] = work[1][0] = matrix[1]; - work[0][2] = work[2][0] = matrix[2]; - work[1][1] = matrix[3]; - work[1][2] = work[2][1] = matrix[4]; - work[2][2] = matrix[5]; - - EigenSolver_Tridiagonal(work, diag, subd); - if (!EigenSolver_QLAlgorithm(work, diag, subd)) - { - for (int i = 0; i < 3; i++) { - eigenValues[i] = 0; - eigenVectors[i] = Vector3(0); - } - return false; - } - - for (int i = 0; i < 3; i++) { - eigenValues[i] = (float)diag[i]; - } - - // eigenvectors are the columns; make them the rows : - - for (int i=0; i < 3; i++) - { - for (int j = 0; j < 3; j++) - { - eigenVectors[j].component[i] = (float) work[i][j]; - } - } - - // shuffle to sort by singular value : - if (eigenValues[2] > eigenValues[0] && eigenValues[2] > eigenValues[1]) - { - swap(eigenValues[0], eigenValues[2]); - swap(eigenVectors[0], eigenVectors[2]); - } - if (eigenValues[1] > eigenValues[0]) - { - swap(eigenValues[0], eigenValues[1]); - swap(eigenVectors[0], eigenVectors[1]); - } - if (eigenValues[2] > eigenValues[1]) - { - swap(eigenValues[1], eigenValues[2]); - swap(eigenVectors[1], eigenVectors[2]); - } - - nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2]); - nvDebugCheck(eigenValues[1] >= eigenValues[2]); - - return true; -} - -static void EigenSolver_Tridiagonal(float mat[3][3],float * diag,float * subd) -{ - // Householder reduction T = Q^t M Q - // Input: - // mat, symmetric 3x3 matrix M - // Output: - // mat, orthogonal matrix Q - // diag, diagonal entries of T - // subd, subdiagonal entries of T (T is symmetric) - const float epsilon = 1e-08f; - - float a = mat[0][0]; - float b = mat[0][1]; - float c = mat[0][2]; - float d = mat[1][1]; - float e = mat[1][2]; - float f = mat[2][2]; - - diag[0] = a; - subd[2] = 0.f; - if ( fabs(c) >= epsilon ) - { - const float ell = sqrtf(b*b+c*c); - b /= ell; - c /= ell; - const float q = 2*b*e+c*(f-d); - diag[1] = d+c*q; - diag[2] = f-c*q; - subd[0] = ell; - subd[1] = e-b*q; - mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0; - mat[1][0] = 0; mat[1][1] = b; mat[1][2] = c; - mat[2][0] = 0; mat[2][1] = c; mat[2][2] = -b; - } - else - { - diag[1] = d; - diag[2] = f; - subd[0] = b; - subd[1] = e; - mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0; - mat[1][0] = 0; mat[1][1] = 1; mat[1][2] = 0; - mat[2][0] = 0; mat[2][1] = 0; mat[2][2] = 1; - } -} - -static bool EigenSolver_QLAlgorithm(float mat[3][3],float * diag,float * subd) -{ - // QL iteration with implicit shifting to reduce matrix from tridiagonal - // to diagonal - const int maxiter = 32; - - for (int ell = 0; ell < 3; ell++) - { - int iter; - for (iter = 0; iter < maxiter; iter++) - { - int m; - for (m = ell; m <= 1; m++) - { - float dd = fabs(diag[m]) + fabs(diag[m+1]); - if ( fabs(subd[m]) + dd == dd ) - break; - } - if ( m == ell ) - break; - - float g = (diag[ell+1]-diag[ell])/(2*subd[ell]); - float r = sqrtf(g*g+1); - if ( g < 0 ) - g = diag[m]-diag[ell]+subd[ell]/(g-r); - else - g = diag[m]-diag[ell]+subd[ell]/(g+r); - float s = 1, c = 1, p = 0; - for (int i = m-1; i >= ell; i--) - { - float f = s*subd[i], b = c*subd[i]; - if ( fabs(f) >= fabs(g) ) - { - c = g/f; - r = sqrtf(c*c+1); - subd[i+1] = f*r; - c *= (s = 1/r); - } - else - { - s = f/g; - r = sqrtf(s*s+1); - subd[i+1] = g*r; - s *= (c = 1/r); - } - g = diag[i+1]-p; - r = (diag[i]-g)*s+2*b*c; - p = s*r; - diag[i+1] = g+p; - g = c*r-b; - - for (int k = 0; k < 3; k++) - { - f = mat[k][i+1]; - mat[k][i+1] = s*mat[k][i]+c*f; - mat[k][i] = c*mat[k][i]-s*f; - } - } - diag[ell] -= p; - subd[ell] = g; - subd[m] = 0; - } - - if ( iter == maxiter ) - // should not get here under normal circumstances - return false; - } - - return true; -} - - - - -int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, Vector3 *__restrict cluster) -{ - // Compute principal component. - float matrix[6]; - Vector3 centroid = computeCovariance(n, points, weights, metric, matrix); - Vector3 principal = firstEigenVector_PowerMethod(matrix); - - // Pick initial solution. - int mini, maxi; - mini = maxi = 0; - - float mindps, maxdps; - mindps = maxdps = dot(points[0] - centroid, principal); - - for (int i = 1; i < n; ++i) - { - float dps = dot(points[i] - centroid, principal); - - if (dps < mindps) { - mindps = dps; - mini = i; - } - else { - maxdps = dps; - maxi = i; - } - } - - cluster[0] = centroid + mindps * principal; - cluster[1] = centroid + maxdps * principal; - cluster[2] = (2.0f * cluster[0] + cluster[1]) / 3.0f; - cluster[3] = (2.0f * cluster[1] + cluster[0]) / 3.0f; - - // Now we have to iteratively refine the clusters. - while (true) - { - Vector3 newCluster[4] = { Vector3(0.0f), Vector3(0.0f), Vector3(0.0f), Vector3(0.0f) }; - float total[4] = {0, 0, 0, 0}; - - for (int i = 0; i < n; ++i) - { - // Find nearest cluster. - int nearest = 0; - float mindist = FLT_MAX; - for (int j = 0; j < 4; j++) - { - float dist = lengthSquared((cluster[j] - points[i]) * metric); - if (dist < mindist) - { - mindist = dist; - nearest = j; - } - } - - newCluster[nearest] += weights[i] * points[i]; - total[nearest] += weights[i]; - } - - for (int j = 0; j < 4; j++) - { - if (total[j] != 0) - newCluster[j] /= total[j]; - } - - if (equal(cluster[0], newCluster[0]) && equal(cluster[1], newCluster[1]) && - equal(cluster[2], newCluster[2]) && equal(cluster[3], newCluster[3])) - { - return (total[0] != 0) + (total[1] != 0) + (total[2] != 0) + (total[3] != 0); - } - - cluster[0] = newCluster[0]; - cluster[1] = newCluster[1]; - cluster[2] = newCluster[2]; - cluster[3] = newCluster[3]; - - // Sort clusters by weight. - for (int i = 0; i < 4; i++) - { - for (int j = i; j > 0 && total[j] > total[j - 1]; j--) - { - swap( total[j], total[j - 1] ); - swap( cluster[j], cluster[j - 1] ); - } - } - } -} - - - -// Adaptation of James Arvo's SVD code, as found in ZOH. - -inline float Sqr(float x) { return x*x; } - +// This code is in the public domain -- Ignacio Castaño + +#include "Fitting.h" +#include "Vector.inl" +#include "Plane.inl" + +#include "nvcore/Utils.h" // max, swap + +#include // FLT_MAX +#include + +using namespace nv; + +// @@ Move to EigenSolver.h + +// @@ We should be able to do something cheaper... +static Vector3 estimatePrincipalComponent(const float * __restrict matrix) +{ + const Vector3 row0(matrix[0], matrix[1], matrix[2]); + const Vector3 row1(matrix[1], matrix[3], matrix[4]); + const Vector3 row2(matrix[2], matrix[4], matrix[5]); + + float r0 = lengthSquared(row0); + float r1 = lengthSquared(row1); + float r2 = lengthSquared(row2); + + if (r0 > r1 && r0 > r2) return row0; + if (r1 > r2) return row1; + return row2; +} + + +static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matrix) +{ + if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) + { + return Vector3(0.0f); + } + + Vector3 v = estimatePrincipalComponent(matrix); + + const int NUM = 8; + for (int i = 0; i < NUM; i++) + { + float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2]; + float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4]; + float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5]; + + float norm = max(max(x, y), z); + + v = Vector3(x, y, z) / norm; + } + + return v; +} + + +Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points) +{ + Vector3 centroid(0.0f); + + for (int i = 0; i < n; i++) + { + centroid += points[i]; + } + centroid /= float(n); + + return centroid; +} + +Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) +{ + Vector3 centroid(0.0f); + float total = 0.0f; + + for (int i = 0; i < n; i++) + { + total += weights[i]; + centroid += weights[i]*points[i]; + } + centroid /= total; + + return centroid; +} + +Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points) +{ + Vector4 centroid(0.0f); + + for (int i = 0; i < n; i++) + { + centroid += points[i]; + } + centroid /= float(n); + + return centroid; +} + +Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric) +{ + Vector4 centroid(0.0f); + float total = 0.0f; + + for (int i = 0; i < n; i++) + { + total += weights[i]; + centroid += weights[i]*points[i]; + } + centroid /= total; + + return centroid; +} + + + +Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, float *__restrict covariance) +{ + // compute the centroid + Vector3 centroid = computeCentroid(n, points); + + // compute covariance matrix + for (int i = 0; i < 6; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector3 v = points[i] - centroid; + + covariance[0] += v.x * v.x; + covariance[1] += v.x * v.y; + covariance[2] += v.x * v.z; + covariance[3] += v.y * v.y; + covariance[4] += v.y * v.z; + covariance[5] += v.z * v.z; + } + + return centroid; +} + +Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, float *__restrict covariance) +{ + // compute the centroid + Vector3 centroid = computeCentroid(n, points, weights, metric); + + // compute covariance matrix + for (int i = 0; i < 6; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector3 a = (points[i] - centroid) * metric; + Vector3 b = weights[i]*a; + + covariance[0] += a.x * b.x; + covariance[1] += a.x * b.y; + covariance[2] += a.x * b.z; + covariance[3] += a.y * b.y; + covariance[4] += a.y * b.z; + covariance[5] += a.z * b.z; + } + + return centroid; +} + +Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, float *__restrict covariance) +{ + // compute the centroid + Vector4 centroid = computeCentroid(n, points); + + // compute covariance matrix + for (int i = 0; i < 10; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector4 v = points[i] - centroid; + + covariance[0] += v.x * v.x; + covariance[1] += v.x * v.y; + covariance[2] += v.x * v.z; + covariance[3] += v.x * v.w; + + covariance[4] += v.y * v.y; + covariance[5] += v.y * v.z; + covariance[6] += v.y * v.w; + + covariance[7] += v.z * v.z; + covariance[8] += v.z * v.w; + + covariance[9] += v.w * v.w; + } + + return centroid; +} + +Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric, float *__restrict covariance) +{ + // compute the centroid + Vector4 centroid = computeCentroid(n, points, weights, metric); + + // compute covariance matrix + for (int i = 0; i < 10; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector4 a = (points[i] - centroid) * metric; + Vector4 b = weights[i]*a; + + covariance[0] += a.x * b.x; + covariance[1] += a.x * b.y; + covariance[2] += a.x * b.z; + covariance[3] += a.x * b.w; + + covariance[4] += a.y * b.y; + covariance[5] += a.y * b.z; + covariance[6] += a.y * b.w; + + covariance[7] += a.z * b.z; + covariance[8] += a.z * b.w; + + covariance[9] += a.w * b.w; + } + + return centroid; +} + + + +Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points) +{ + float matrix[6]; + computeCovariance(n, points, matrix); + + return firstEigenVector_PowerMethod(matrix); +} + +Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) +{ + float matrix[6]; + computeCovariance(n, points, weights, metric, matrix); + + return firstEigenVector_PowerMethod(matrix); +} + + + +static inline Vector3 firstEigenVector_EigenSolver3(const float *__restrict matrix) +{ + if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) + { + return Vector3(0.0f); + } + + float eigenValues[3]; + Vector3 eigenVectors[3]; + if (!nv::Fit::eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) + { + return Vector3(0.0f); + } + + return eigenVectors[0]; +} + +Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points) +{ + float matrix[6]; + computeCovariance(n, points, matrix); + + return firstEigenVector_EigenSolver3(matrix); +} + +Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) +{ + float matrix[6]; + computeCovariance(n, points, weights, metric, matrix); + + return firstEigenVector_EigenSolver3(matrix); +} + + + +static inline Vector4 firstEigenVector_EigenSolver4(const float *__restrict matrix) +{ + if (matrix[0] == 0 && matrix[4] == 0 && matrix[7] == 0&& matrix[9] == 0) + { + return Vector4(0.0f); + } + + float eigenValues[4]; + Vector4 eigenVectors[4]; + if (!nv::Fit::eigenSolveSymmetric4(matrix, eigenValues, eigenVectors)) + { + return Vector4(0.0f); + } + + return eigenVectors[0]; +} + +Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points) +{ + float matrix[10]; + computeCovariance(n, points, matrix); + + return firstEigenVector_EigenSolver4(matrix); +} + +Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric) +{ + float matrix[10]; + computeCovariance(n, points, weights, metric, matrix); + + return firstEigenVector_EigenSolver4(matrix); +} + + + +void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R); + +Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points) +{ + // Store the points in an n x n matrix + std::vector Q(n*n, 0.0f); + for (int i = 0; i < n; ++i) + { + Q[i*n+0] = points[i].x; + Q[i*n+1] = points[i].y; + Q[i*n+2] = points[i].z; + } + + // Alloc space for the SVD outputs + std::vector diag(n, 0.0f); + std::vector R(n*n, 0.0f); + + ArvoSVD(n, n, &Q[0], &diag[0], &R[0]); + + // Get the principal component + return Vector3(R[0], R[1], R[2]); +} + +Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict points) +{ + // Store the points in an n x n matrix + std::vector Q(n*n, 0.0f); + for (int i = 0; i < n; ++i) + { + Q[i*n+0] = points[i].x; + Q[i*n+1] = points[i].y; + Q[i*n+2] = points[i].z; + Q[i*n+3] = points[i].w; + } + + // Alloc space for the SVD outputs + std::vector diag(n, 0.0f); + std::vector R(n*n, 0.0f); + + ArvoSVD(n, n, &Q[0], &diag[0], &R[0]); + + // Get the principal component + return Vector4(R[0], R[1], R[2], R[3]); +} + + + +Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points) +{ + // compute the centroid and covariance + float matrix[6]; + Vector3 centroid = computeCovariance(n, points, matrix); + + if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) + { + // If no plane defined, then return a horizontal plane. + return Plane(Vector3(0, 0, 1), centroid); + } + + float eigenValues[3]; + Vector3 eigenVectors[3]; + if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) { + // If no plane defined, then return a horizontal plane. + return Plane(Vector3(0, 0, 1), centroid); + } + + return Plane(eigenVectors[2], centroid); +} + +bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON*/) +{ + // compute the centroid and covariance + float matrix[6]; + computeCovariance(n, points, matrix); + + float eigenValues[3]; + Vector3 eigenVectors[3]; + if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) { + return false; + } + + return eigenValues[2] < epsilon; +} + + + +// Tridiagonal solver from Charles Bloom. +// Householder transforms followed by QL decomposition. +// Seems to be based on the code from Numerical Recipes in C. + +static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd); +static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd); + +bool nv::Fit::eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]) +{ + nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL); + + float subd[3]; + float diag[3]; + float work[3][3]; + + work[0][0] = matrix[0]; + work[0][1] = work[1][0] = matrix[1]; + work[0][2] = work[2][0] = matrix[2]; + work[1][1] = matrix[3]; + work[1][2] = work[2][1] = matrix[4]; + work[2][2] = matrix[5]; + + EigenSolver3_Tridiagonal(work, diag, subd); + if (!EigenSolver3_QLAlgorithm(work, diag, subd)) + { + for (int i = 0; i < 3; i++) { + eigenValues[i] = 0; + eigenVectors[i] = Vector3(0); + } + return false; + } + + for (int i = 0; i < 3; i++) { + eigenValues[i] = (float)diag[i]; + } + + // eigenvectors are the columns; make them the rows : + + for (int i=0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + { + eigenVectors[j].component[i] = (float) work[i][j]; + } + } + + // shuffle to sort by singular value : + if (eigenValues[2] > eigenValues[0] && eigenValues[2] > eigenValues[1]) + { + swap(eigenValues[0], eigenValues[2]); + swap(eigenVectors[0], eigenVectors[2]); + } + if (eigenValues[1] > eigenValues[0]) + { + swap(eigenValues[0], eigenValues[1]); + swap(eigenVectors[0], eigenVectors[1]); + } + if (eigenValues[2] > eigenValues[1]) + { + swap(eigenValues[1], eigenValues[2]); + swap(eigenVectors[1], eigenVectors[2]); + } + + nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2]); + nvDebugCheck(eigenValues[1] >= eigenValues[2]); + + return true; +} + +static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd) +{ + // Householder reduction T = Q^t M Q + // Input: + // mat, symmetric 3x3 matrix M + // Output: + // mat, orthogonal matrix Q + // diag, diagonal entries of T + // subd, subdiagonal entries of T (T is symmetric) + const float epsilon = 1e-08f; + + float a = mat[0][0]; + float b = mat[0][1]; + float c = mat[0][2]; + float d = mat[1][1]; + float e = mat[1][2]; + float f = mat[2][2]; + + diag[0] = a; + subd[2] = 0.f; + if ( fabs(c) >= epsilon ) + { + const float ell = sqrtf(b*b+c*c); + b /= ell; + c /= ell; + const float q = 2*b*e+c*(f-d); + diag[1] = d+c*q; + diag[2] = f-c*q; + subd[0] = ell; + subd[1] = e-b*q; + mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0; + mat[1][0] = 0; mat[1][1] = b; mat[1][2] = c; + mat[2][0] = 0; mat[2][1] = c; mat[2][2] = -b; + } + else + { + diag[1] = d; + diag[2] = f; + subd[0] = b; + subd[1] = e; + mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0; + mat[1][0] = 0; mat[1][1] = 1; mat[1][2] = 0; + mat[2][0] = 0; mat[2][1] = 0; mat[2][2] = 1; + } +} + +static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd) +{ + // QL iteration with implicit shifting to reduce matrix from tridiagonal + // to diagonal + const int maxiter = 32; + + for (int ell = 0; ell < 3; ell++) + { + int iter; + for (iter = 0; iter < maxiter; iter++) + { + int m; + for (m = ell; m <= 1; m++) + { + float dd = fabs(diag[m]) + fabs(diag[m+1]); + if ( fabs(subd[m]) + dd == dd ) + break; + } + if ( m == ell ) + break; + + float g = (diag[ell+1]-diag[ell])/(2*subd[ell]); + float r = sqrtf(g*g+1); + if ( g < 0 ) + g = diag[m]-diag[ell]+subd[ell]/(g-r); + else + g = diag[m]-diag[ell]+subd[ell]/(g+r); + float s = 1, c = 1, p = 0; + for (int i = m-1; i >= ell; i--) + { + float f = s*subd[i], b = c*subd[i]; + if ( fabs(f) >= fabs(g) ) + { + c = g/f; + r = sqrtf(c*c+1); + subd[i+1] = f*r; + c *= (s = 1/r); + } + else + { + s = f/g; + r = sqrtf(s*s+1); + subd[i+1] = g*r; + s *= (c = 1/r); + } + g = diag[i+1]-p; + r = (diag[i]-g)*s+2*b*c; + p = s*r; + diag[i+1] = g+p; + g = c*r-b; + + for (int k = 0; k < 3; k++) + { + f = mat[k][i+1]; + mat[k][i+1] = s*mat[k][i]+c*f; + mat[k][i] = c*mat[k][i]-s*f; + } + } + diag[ell] -= p; + subd[ell] = g; + subd[m] = 0; + } + + if ( iter == maxiter ) + // should not get here under normal circumstances + return false; + } + + return true; +} + + + +// Tridiagonal solver for 4x4 symmetric matrices. + +static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd); +static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd); + +bool nv::Fit::eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]) +{ + nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL); + + float subd[4]; + float diag[4]; + float work[4][4]; + + work[0][0] = matrix[0]; + work[0][1] = work[1][0] = matrix[1]; + work[0][2] = work[2][0] = matrix[2]; + work[0][3] = work[3][0] = matrix[3]; + work[1][1] = matrix[4]; + work[1][2] = work[2][1] = matrix[5]; + work[1][3] = work[3][1] = matrix[6]; + work[2][2] = matrix[7]; + work[2][3] = work[3][2] = matrix[8]; + work[3][3] = matrix[9]; + + EigenSolver4_Tridiagonal(work, diag, subd); + if (!EigenSolver4_QLAlgorithm(work, diag, subd)) + { + for (int i = 0; i < 4; i++) { + eigenValues[i] = 0; + eigenVectors[i] = Vector4(0); + } + return false; + } + + for (int i = 0; i < 4; i++) { + eigenValues[i] = (float)diag[i]; + } + + // eigenvectors are the columns; make them the rows + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + eigenVectors[j].component[i] = (float) work[i][j]; + } + } + + // sort by singular value + + for (int i = 0; i < 3; ++i) + { + for (int j = i+1; j < 4; ++j) + { + if (eigenValues[j] > eigenValues[i]) + { + swap(eigenValues[i], eigenValues[j]); + swap(eigenVectors[i], eigenVectors[j]); + } + } + } + + nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2] && eigenValues[0] >= eigenValues[3]); + nvDebugCheck(eigenValues[1] >= eigenValues[2] && eigenValues[1] >= eigenValues[3]); + nvDebugCheck(eigenValues[2] >= eigenValues[2]); + + return true; +} + +#include "nvmath/Matrix.inl" + +inline float signNonzero(float x) +{ + return (x >= 0.0f) ? 1.0f : -1.0f; +} + +static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd) +{ + // Householder reduction T = Q^t M Q + // Input: + // mat, symmetric 3x3 matrix M + // Output: + // mat, orthogonal matrix Q + // diag, diagonal entries of T + // subd, subdiagonal entries of T (T is symmetric) + + static const int n = 4; + + // Set epsilon relative to size of elements in matrix + static const float relEpsilon = 1e-6f; + float maxElement = FLT_MAX; + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + maxElement = max(maxElement, fabs(mat[i][j])); + float epsilon = relEpsilon * maxElement; + + // Iterative algorithm, works for any size of matrix but might be slower than + // a closed-form solution for symmetric 4x4 matrices. Based on this article: + // http://en.wikipedia.org/wiki/Householder_transformation#Tridiagonalization + + Matrix A, Q(identity); + memcpy(&A, mat, sizeof(float)*n*n); + + // We proceed from left to right, making the off-tridiagonal entries zero in + // one column of the matrix at a time. + for (int k = 0; k < n - 2; ++k) + { + float sum = 0.0f; + for (int j = k+1; j < n; ++j) + sum += A(j,k)*A(j,k); + float alpha = -signNonzero(A(k+1,k)) * sqrtf(sum); + float r = sqrtf(0.5f * (alpha*alpha - A(k+1,k)*alpha)); + + // If r is zero, skip this column - already in tridiagonal form + if (fabs(r) < epsilon) + continue; + + float v[n] = {}; + v[k+1] = 0.5f * (A(k+1,k) - alpha) / r; + for (int j = k+2; j < n; ++j) + v[j] = 0.5f * A(j,k) / r; + + Matrix P(identity); + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + P(i,j) -= 2.0f * v[i] * v[j]; + + A = mul(mul(P, A), P); + Q = mul(Q, P); + } + + nvDebugCheck(fabs(A(2,0)) < epsilon); + nvDebugCheck(fabs(A(0,2)) < epsilon); + nvDebugCheck(fabs(A(3,0)) < epsilon); + nvDebugCheck(fabs(A(0,3)) < epsilon); + nvDebugCheck(fabs(A(3,1)) < epsilon); + nvDebugCheck(fabs(A(1,3)) < epsilon); + + for (int i = 0; i < n; ++i) + diag[i] = A(i,i); + for (int i = 0; i < n - 1; ++i) + subd[i] = A(i+1,i); + subd[n-1] = 0.0f; + + memcpy(mat, &Q, sizeof(float)*n*n); +} + +static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd) +{ + // QL iteration with implicit shifting to reduce matrix from tridiagonal + // to diagonal + const int maxiter = 32; + + for (int ell = 0; ell < 4; ell++) + { + int iter; + for (iter = 0; iter < maxiter; iter++) + { + int m; + for (m = ell; m < 3; m++) + { + float dd = fabs(diag[m]) + fabs(diag[m+1]); + if ( fabs(subd[m]) + dd == dd ) + break; + } + if ( m == ell ) + break; + + float g = (diag[ell+1]-diag[ell])/(2*subd[ell]); + float r = sqrtf(g*g+1); + if ( g < 0 ) + g = diag[m]-diag[ell]+subd[ell]/(g-r); + else + g = diag[m]-diag[ell]+subd[ell]/(g+r); + float s = 1, c = 1, p = 0; + for (int i = m-1; i >= ell; i--) + { + float f = s*subd[i], b = c*subd[i]; + if ( fabs(f) >= fabs(g) ) + { + c = g/f; + r = sqrtf(c*c+1); + subd[i+1] = f*r; + c *= (s = 1/r); + } + else + { + s = f/g; + r = sqrtf(s*s+1); + subd[i+1] = g*r; + s *= (c = 1/r); + } + g = diag[i+1]-p; + r = (diag[i]-g)*s+2*b*c; + p = s*r; + diag[i+1] = g+p; + g = c*r-b; + + for (int k = 0; k < 4; k++) + { + f = mat[k][i+1]; + mat[k][i+1] = s*mat[k][i]+c*f; + mat[k][i] = c*mat[k][i]-s*f; + } + } + diag[ell] -= p; + subd[ell] = g; + subd[m] = 0; + } + + if ( iter == maxiter ) + // should not get here under normal circumstances + return false; + } + + return true; +} + + + +int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, Vector3 *__restrict cluster) +{ + // Compute principal component. + float matrix[6]; + Vector3 centroid = computeCovariance(n, points, weights, metric, matrix); + Vector3 principal = firstEigenVector_PowerMethod(matrix); + + // Pick initial solution. + int mini, maxi; + mini = maxi = 0; + + float mindps, maxdps; + mindps = maxdps = dot(points[0] - centroid, principal); + + for (int i = 1; i < n; ++i) + { + float dps = dot(points[i] - centroid, principal); + + if (dps < mindps) { + mindps = dps; + mini = i; + } + else { + maxdps = dps; + maxi = i; + } + } + + cluster[0] = centroid + mindps * principal; + cluster[1] = centroid + maxdps * principal; + cluster[2] = (2.0f * cluster[0] + cluster[1]) / 3.0f; + cluster[3] = (2.0f * cluster[1] + cluster[0]) / 3.0f; + + // Now we have to iteratively refine the clusters. + while (true) + { + Vector3 newCluster[4] = { Vector3(0.0f), Vector3(0.0f), Vector3(0.0f), Vector3(0.0f) }; + float total[4] = {0, 0, 0, 0}; + + for (int i = 0; i < n; ++i) + { + // Find nearest cluster. + int nearest = 0; + float mindist = FLT_MAX; + for (int j = 0; j < 4; j++) + { + float dist = lengthSquared((cluster[j] - points[i]) * metric); + if (dist < mindist) + { + mindist = dist; + nearest = j; + } + } + + newCluster[nearest] += weights[i] * points[i]; + total[nearest] += weights[i]; + } + + for (int j = 0; j < 4; j++) + { + if (total[j] != 0) + newCluster[j] /= total[j]; + } + + if (equal(cluster[0], newCluster[0]) && equal(cluster[1], newCluster[1]) && + equal(cluster[2], newCluster[2]) && equal(cluster[3], newCluster[3])) + { + return (total[0] != 0) + (total[1] != 0) + (total[2] != 0) + (total[3] != 0); + } + + cluster[0] = newCluster[0]; + cluster[1] = newCluster[1]; + cluster[2] = newCluster[2]; + cluster[3] = newCluster[3]; + + // Sort clusters by weight. + for (int i = 0; i < 4; i++) + { + for (int j = i; j > 0 && total[j] > total[j - 1]; j--) + { + swap( total[j], total[j - 1] ); + swap( cluster[j], cluster[j - 1] ); + } + } + } +} + + + +// Adaptation of James Arvo's SVD code, as found in ZOH. + +inline float Sqr(float x) { return x*x; } + inline float svd_pythag( float a, float b ) { float at = fabsf(a); @@ -552,9 +928,9 @@ inline float SameSign( float a, float b ) else t = -fabsf( a ); return t; } - -void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R) -{ + +void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R) +{ static const int MaxIterations = 30; int i, j, k, l, p, q, iter; @@ -824,4 +1200,4 @@ void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R) swap(diag[i], diag[bindex]); } } -} +} diff --git a/src/nvmath/Fitting.h b/src/nvmath/Fitting.h index c3f9cf8..7a88cd2 100644 --- a/src/nvmath/Fitting.h +++ b/src/nvmath/Fitting.h @@ -14,22 +14,32 @@ namespace nv Vector3 computeCentroid(int n, const Vector3 * points); Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + Vector4 computeCentroid(int n, const Vector4 * points); + Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric); + Vector3 computeCovariance(int n, const Vector3 * points, float * covariance); Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance); + Vector4 computeCovariance(int n, const Vector4 * points, float * covariance); + Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance); + Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points); Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric); Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points); Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points); + Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric); + Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points); + Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points); Plane bestPlane(int n, const Vector3 * points); bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON); - bool eigenSolveSymmetric (const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]); - + bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]); + bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]); // Returns number of clusters [1-4]. int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster); diff --git a/src/nvmath/Vector.h b/src/nvmath/Vector.h index 231d3b9..b10865a 100644 --- a/src/nvmath/Vector.h +++ b/src/nvmath/Vector.h @@ -73,6 +73,7 @@ namespace nv void operator*=(float s); void operator/=(float s); void operator*=(Vector3::Arg v); + void operator/=(Vector3::Arg v); friend bool operator==(Vector3::Arg a, Vector3::Arg b); friend bool operator!=(Vector3::Arg a, Vector3::Arg b); @@ -116,7 +117,9 @@ namespace nv void operator+=(Vector4::Arg v); void operator-=(Vector4::Arg v); void operator*=(float s); + void operator/=(float s); void operator*=(Vector4::Arg v); + void operator/=(Vector4::Arg v); friend bool operator==(Vector4::Arg a, Vector4::Arg b); friend bool operator!=(Vector4::Arg a, Vector4::Arg b); diff --git a/src/nvmath/Vector.inl b/src/nvmath/Vector.inl index 6f26262..769e366 100644 --- a/src/nvmath/Vector.inl +++ b/src/nvmath/Vector.inl @@ -158,6 +158,13 @@ namespace nv z *= v.z; } + inline void Vector3::operator/=(Vector3::Arg v) + { + x /= v.x; + y /= v.y; + z /= v.z; + } + inline bool operator==(Vector3::Arg a, Vector3::Arg b) { return a.x == b.x && a.y == b.y && a.z == b.z; @@ -243,6 +250,14 @@ namespace nv w *= s; } + inline void Vector4::operator/=(float s) + { + x /= s; + y /= s; + z /= s; + w /= s; + } + inline void Vector4::operator*=(Vector4::Arg v) { x *= v.x; @@ -251,6 +266,14 @@ namespace nv w *= v.w; } + inline void Vector4::operator/=(Vector4::Arg v) + { + x /= v.x; + y /= v.y; + z /= v.z; + w /= v.w; + } + inline bool operator==(Vector4::Arg a, Vector4::Arg b) { return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; @@ -677,6 +700,11 @@ namespace nv return scale(v, s); } + inline Vector4 operator*(Vector4::Arg v, Vector4::Arg s) + { + return scale(v, s); + } + inline Vector4 operator/(Vector4::Arg v, float s) { return scale(v, 1.0f/s); diff --git a/src/nvtt/CompressorDX11.cpp b/src/nvtt/CompressorDX11.cpp index ff78fd7..98c736d 100644 --- a/src/nvtt/CompressorDX11.cpp +++ b/src/nvtt/CompressorDX11.cpp @@ -29,12 +29,10 @@ #include "CompressionOptions.h" #include "nvimage/ColorBlock.h" #include "nvmath/Half.h" +#include "nvmath/Vector.inl" #include "bc6h/zoh.h" -#include "bc6h/utils.h" - -//#include "bc7/avpcl.h" -//#include "bc7/utils.h" +#include "bc7/avpcl.h" using namespace nv; using namespace nvtt; @@ -42,21 +40,24 @@ using namespace nvtt; void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output) { - NV_UNUSED(alphaMode); // ZOH does not support alpha. + // !!!UNDONE: support channel weights + // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) + + NV_UNUSED(alphaMode); // ZOH does not support alpha. if (compressionOptions.pixelType == PixelType_UnsignedFloat || compressionOptions.pixelType == PixelType_UnsignedNorm || compressionOptions.pixelType == PixelType_UnsignedInt) { - Utils::FORMAT = UNSIGNED_F16; // @@ Do not use globals. + ZOH::Utils::FORMAT = ZOH::UNSIGNED_F16; } else { - Utils::FORMAT = SIGNED_F16; + ZOH::Utils::FORMAT = ZOH::SIGNED_F16; } // Convert NVTT's tile struct to ZOH's, and convert float to half. - Tile zohTile(tile.w, tile.h); + ZOH::Tile zohTile(tile.w, tile.h); memset(zohTile.data, 0, sizeof(zohTile.data)); memset(zohTile.importance_map, 0, sizeof(zohTile.importance_map)); for (uint y = 0; y < tile.h; ++y) @@ -67,9 +68,9 @@ void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const Co uint16 rHalf = to_half(color.x); uint16 gHalf = to_half(color.y); uint16 bHalf = to_half(color.z); - zohTile.data[y][x].x = Tile::half2float(rHalf); - zohTile.data[y][x].y = Tile::half2float(gHalf); - zohTile.data[y][x].z = Tile::half2float(bHalf); + zohTile.data[y][x].x = ZOH::Tile::half2float(rHalf); + zohTile.data[y][x].y = ZOH::Tile::half2float(gHalf); + zohTile.data[y][x].z = ZOH::Tile::half2float(bHalf); zohTile.importance_map[y][x] = 1.0f; } } @@ -77,8 +78,22 @@ void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const Co ZOH::compress(zohTile, (char *)output); } - void CompressorBC7::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output) { - // @@ TODO + // !!!UNDONE: support channel weights + // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) + + AVPCL::mode_rgb = false; + AVPCL::flag_premult = (alphaMode == AlphaMode_Premultiplied); + AVPCL::flag_nonuniform = false; + AVPCL::flag_nonuniform_ati = false; + + // Convert NVTT's tile struct to AVPCL's. + AVPCL::Tile avpclTile(tile.w, tile.h); + memset(avpclTile.data, 0, sizeof(avpclTile.data)); + for (uint y = 0; y < tile.h; ++y) + for (uint x = 0; x < tile.w; ++x) + avpclTile.data[y][x] = tile.color(x, y) * 255.0f; + + AVPCL::compress(avpclTile, (char *)output); } diff --git a/src/nvtt/Context.cpp b/src/nvtt/Context.cpp index 10cf76a..c7c9631 100644 --- a/src/nvtt/Context.cpp +++ b/src/nvtt/Context.cpp @@ -775,8 +775,7 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression } else if (compressionOptions.format == Format_BC7) { - // !!!UNDONE - //return new CompressorBC7; + return new CompressorBC7; } return NULL; diff --git a/src/nvtt/Surface.cpp b/src/nvtt/Surface.cpp index 9cc9e54..b3ebc0b 100644 --- a/src/nvtt/Surface.cpp +++ b/src/nvtt/Surface.cpp @@ -707,7 +707,8 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi format != nvtt::Format_BC3 && format != nvtt::Format_BC4 && format != nvtt::Format_BC5 && - format != nvtt::Format_BC6) + format != nvtt::Format_BC6 && + format != nvtt::Format_BC7) { return false; } @@ -822,6 +823,11 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi const BlockATI2 * block = (const BlockATI2 *)ptr; block->decodeBlock(&colors, decoder == Decoder_D3D9); } + else if (format == nvtt::Format_BC7) + { + const BlockBC7 * block = (const BlockBC7 *)ptr; + block->decodeBlock(&colors); + } else { nvDebugCheck(false); diff --git a/src/nvtt/bc6h/bits.h b/src/nvtt/bc6h/bits.h index 3da7d79..9969c9e 100644 --- a/src/nvtt/bc6h/bits.h +++ b/src/nvtt/bc6h/bits.h @@ -10,36 +10,38 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#ifndef _BITS_H -#define _BITS_H +#ifndef _ZOH_BITS_H +#define _ZOH_BITS_H // read/write a bitstream -#include +#include "nvcore/Debug.h" + +namespace ZOH { class Bits { public: - Bits(char *data, int maxdatabits) { assert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;} - Bits(const char *data, int availdatabits) { assert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;} + Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;} + Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;} void write(int value, int nbits) { - assert (nbits >= 0 && nbits < 32); - assert (sizeof(int)>= 4); + nvAssert (nbits >= 0 && nbits < 32); + nvAssert (sizeof(int)>= 4); for (int i=0; i>i); } int read(int nbits) { - assert (nbits >= 0 && nbits < 32); - assert (sizeof(int)>= 4); + nvAssert (nbits >= 0 && nbits < 32); + nvAssert (sizeof(int)>= 4); int out = 0; for (int i=0; i= 0 && ptr < maxbits); bptr = ptr; } + int setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; } int getsize() { return bend; } private: @@ -51,7 +53,7 @@ private: char readonly; // 1 if this is a read-only stream int readone() { - assert (bptr < bend); + nvAssert (bptr < bend); if (bptr >= bend) return 0; int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7)); ++bptr; @@ -60,7 +62,7 @@ private: void writeone(int bit) { if (readonly) throw "Writing a read-only bit stream"; - assert (bptr < maxbits); + nvAssert (bptr < maxbits); if (bptr >= maxbits) return; if (bit&1) bits[bptr>>3] |= 1 << (bptr & 7); @@ -70,4 +72,6 @@ private: } }; +} + #endif diff --git a/src/nvtt/bc6h/shapes_two.h b/src/nvtt/bc6h/shapes_two.h index 3d19a9f..b259a31 100644 --- a/src/nvtt/bc6h/shapes_two.h +++ b/src/nvtt/bc6h/shapes_two.h @@ -10,8 +10,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#ifndef _SHAPES_TWO_H -#define _SHAPES_TWO_H +#ifndef _ZOH_SHAPES_TWO_H +#define _ZOH_SHAPES_TWO_H // shapes for two regions diff --git a/src/nvtt/bc6h/tile.h b/src/nvtt/bc6h/tile.h index b713bb8..3a9e068 100644 --- a/src/nvtt/bc6h/tile.h +++ b/src/nvtt/bc6h/tile.h @@ -10,15 +10,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#ifndef _TILE_H -#define _TILE_H +#ifndef _ZOH_TILE_H +#define _ZOH_TILE_H #include "utils.h" - #include "nvmath/Vector.h" - #include +namespace ZOH { //#define USE_IMPORTANCE_MAP 1 // define this if you want to increase importance of some pixels in tile class Tile @@ -79,4 +78,6 @@ public: int size_x, size_y; // actual size of tile }; -#endif // _TILE_H +} + +#endif // _ZOH_TILE_H diff --git a/src/nvtt/bc6h/utils.cpp b/src/nvtt/bc6h/utils.cpp index 5bca41a..ff888e8 100644 --- a/src/nvtt/bc6h/utils.cpp +++ b/src/nvtt/bc6h/utils.cpp @@ -17,6 +17,7 @@ See the License for the specific language governing permissions and limitations #include using namespace nv; +using namespace ZOH; static const int denom7_weights_64[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64 static const int denom15_weights_64[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64 diff --git a/src/nvtt/bc6h/utils.h b/src/nvtt/bc6h/utils.h index 87df603..4ef0d4b 100644 --- a/src/nvtt/bc6h/utils.h +++ b/src/nvtt/bc6h/utils.h @@ -12,15 +12,14 @@ See the License for the specific language governing permissions and limitations // utility class holding common routines #pragma once -#ifndef _UTILS_H -#define _UTILS_H +#ifndef _ZOH_UTILS_H +#define _ZOH_UTILS_H #include "nvmath/Vector.h" +namespace ZOH { -#define PALETTE_LERP(a, b, i, denom) Utils::lerp(a, b, i, denom) - -#define SIGN_EXTEND(x,nb) ((((signed(x))&(1<<((nb)-1)))?((~0)<<(nb)):0)|(signed(x))) +inline int SIGN_EXTEND(int x, int nb) { return ((((signed(x))&(1<<((nb)-1)))?((~0)<<(nb)):0)|(signed(x))); } enum Field { FIELD_M = 1, // mode @@ -31,20 +30,20 @@ enum Field { }; // some constants -#define F16S_MASK 0x8000 // f16 sign mask -#define F16EM_MASK 0x7fff // f16 exp & mantissa mask -#define U16MAX 0xffff -#define S16MIN (-0x8000) -#define S16MAX 0x7fff -#define INT16_MASK 0xffff -#define F16MAX (0x7bff) // MAXFLT bit pattern for halfs +static const int F16S_MASK = 0x8000; // f16 sign mask +static const int F16EM_MASK = 0x7fff; // f16 exp & mantissa mask +static const int U16MAX = 0xffff; +static const int S16MIN = -0x8000; +static const int S16MAX = 0x7fff; +static const int INT16_MASK = 0xffff; +static const int F16MAX = 0x7bff; // MAXFLT bit pattern for halfs enum Format { UNSIGNED_F16, SIGNED_F16 }; class Utils { public: - static ::Format FORMAT; // this is a global -- we're either handling unsigned or unsigned half values + static Format FORMAT; // this is a global -- we're either handling unsigned or unsigned half values // error metrics static float norm(const nv::Vector3 &a, const nv::Vector3 &b); @@ -69,4 +68,6 @@ public: static nv::Vector3 lerp(const nv::Vector3 & a, const nv::Vector3 & b, int i, int denom); }; -#endif // _UTILS_H +} + +#endif // _ZOH_UTILS_H diff --git a/src/nvtt/bc6h/zoh.cpp b/src/nvtt/bc6h/zoh.cpp index edf17cb..80275fb 100644 --- a/src/nvtt/bc6h/zoh.cpp +++ b/src/nvtt/bc6h/zoh.cpp @@ -17,6 +17,8 @@ See the License for the specific language governing permissions and limitations #include // memcpy +using namespace ZOH; + bool ZOH::isone(const char *block) { @@ -130,7 +132,7 @@ static void stats(char block[ZOH::BLOCKSIZE]) { char mode = block[0] & 0x1F; if ((mode & 0x3) == 0) mode = 0; if ((mode & 0x3) == 1) mode = 1; modehist[mode]++; int prec = mode_to_prec[mode]; - assert (prec != -1); + nvAssert (prec != -1); if (!ZOH::isone(block)) { tworegions++; diff --git a/src/nvtt/bc6h/zoh.h b/src/nvtt/bc6h/zoh.h index f3c2882..e52aa2f 100644 --- a/src/nvtt/bc6h/zoh.h +++ b/src/nvtt/bc6h/zoh.h @@ -15,17 +15,13 @@ See the License for the specific language governing permissions and limitations #include "tile.h" +namespace ZOH { + // UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f -#define EXTERNAL_RELEASE 1 // define this if we're releasing this code externally - -#define NREGIONS_TWO 2 -#define NREGIONS_ONE 1 -#define NCHANNELS 3 - -// Note: this code only reads OpenEXR files, which are only in F16 format. -// if unsigned is selected, the input is clamped to >= 0. -// if f16 is selected, the range is clamped to 0..0x7bff. +static const int NREGIONS_TWO = 2; +static const int NREGIONS_ONE = 1; +static const int NCHANNELS = 3; struct FltEndpts { @@ -45,28 +41,25 @@ struct ComprEndpts uint B[NCHANNELS]; }; -class ZOH -{ -public: - static const int BLOCKSIZE=16; - static const int BITSIZE=128; - static Format FORMAT; +static const int BLOCKSIZE=16; +static const int BITSIZE=128; - static void compress(const Tile &t, char *block); - static void decompress(const char *block, Tile &t); +void compress(const Tile &t, char *block); +void decompress(const char *block, Tile &t); - static float compressone(const Tile &t, char *block); - static float compresstwo(const Tile &t, char *block); - static void decompressone(const char *block, Tile &t); - static void decompresstwo(const char *block, Tile &t); +float compressone(const Tile &t, char *block); +float compresstwo(const Tile &t, char *block); +void decompressone(const char *block, Tile &t); +void decompresstwo(const char *block, Tile &t); - static float refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block); - static float roughtwo(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_TWO]); +float refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block); +float roughtwo(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_TWO]); - static float refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block); - static float roughone(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_ONE]); +float refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block); +float roughone(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_ONE]); - static bool isone(const char *block); -}; +bool isone(const char *block); + +} #endif // _ZOH_H diff --git a/src/nvtt/bc6h/zohone.cpp b/src/nvtt/bc6h/zohone.cpp index 39959d5..2b246bb 100644 --- a/src/nvtt/bc6h/zohone.cpp +++ b/src/nvtt/bc6h/zohone.cpp @@ -25,6 +25,7 @@ See the License for the specific language governing permissions and limitations #include // FLT_MAX using namespace nv; +using namespace ZOH; #define NINDICES 16 #define INDEXBITS 4 @@ -324,21 +325,21 @@ static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].x = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); + palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); a = Utils::unquantize(endpts.A[1], prec); b = Utils::unquantize(endpts.B[1], prec); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].y = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); + palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); a = Utils::unquantize(endpts.A[2], prec); b = Utils::unquantize(endpts.B[2], prec); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].z = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); + palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); } // position 0 was compressed @@ -666,7 +667,7 @@ static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], V { for (int region = 0; region < NREGIONS_ONE; ++region) for (int i = 0; i < NINDICES; ++i) - palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, DENOM); + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM); } // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined diff --git a/src/nvtt/bc6h/zohtwo.cpp b/src/nvtt/bc6h/zohtwo.cpp index ce2dcee..4c43fad 100644 --- a/src/nvtt/bc6h/zohtwo.cpp +++ b/src/nvtt/bc6h/zohtwo.cpp @@ -49,6 +49,7 @@ See the License for the specific language governing permissions and limitations #include // FLT_MAX using namespace nv; +using namespace ZOH; #define NINDICES 8 #define INDEXBITS 3 @@ -396,21 +397,21 @@ static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].x = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); + palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); a = Utils::unquantize(endpts.A[1], prec); b = Utils::unquantize(endpts.B[1], prec); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].y = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); + palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); a = Utils::unquantize(endpts.A[2], prec); b = Utils::unquantize(endpts.B[2], prec); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].z = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); + palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); } static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W]) @@ -753,7 +754,7 @@ static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_TWO], V { for (int region = 0; region < NREGIONS_TWO; ++region) for (int i = 0; i < NINDICES; ++i) - palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, DENOM); + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM); } // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined diff --git a/src/nvtt/bc7/CMakeLists.txt b/src/nvtt/bc7/CMakeLists.txt new file mode 100644 index 0000000..5806535 --- /dev/null +++ b/src/nvtt/bc7/CMakeLists.txt @@ -0,0 +1,30 @@ +PROJECT(bc7) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +SET(BC7_SRCS + avpcl.cpp + avpcl.h + avpcl_mode0.cpp + avpcl_mode1.cpp + avpcl_mode2.cpp + avpcl_mode3.cpp + avpcl_mode4.cpp + avpcl_mode5.cpp + avpcl_mode6.cpp + avpcl_mode7.cpp + bits.h + endpts.h + shapes_three.h + shapes_two.h + tile.h + utils.cpp + utils.h) + +ADD_LIBRARY(bc7 STATIC ${BC7_SRCS}) + +IF(NOT WIN32) + IF(CMAKE_COMPILER_IS_GNUCXX) + SET_TARGET_PROPERTIES(bc6h PROPERTIES COMPILE_FLAGS -fPIC) + ENDIF(CMAKE_COMPILER_IS_GNUCXX) +ENDIF(NOT WIN32) diff --git a/src/nvtt/bc7/ImfArray.h b/src/nvtt/bc7/ImfArray.h deleted file mode 100644 index 5160fa4..0000000 --- a/src/nvtt/bc7/ImfArray.h +++ /dev/null @@ -1,261 +0,0 @@ -/////////////////////////////////////////////////////////////////////////// -// -// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas -// Digital Ltd. LLC -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Industrial Light & Magic nor the names of -// its contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -/////////////////////////////////////////////////////////////////////////// - - - -#ifndef INCLUDED_IMF_ARRAY_H -#define INCLUDED_IMF_ARRAY_H - -//------------------------------------------------------------------------- -// -// class Array -// class Array2D -// -// "Arrays of T" whose sizes are not known at compile time. -// When an array goes out of scope, its elements are automatically -// deleted. -// -// Usage example: -// -// struct C -// { -// C () {std::cout << "C::C (" << this << ")\n";}; -// virtual ~C () {std::cout << "C::~C (" << this << ")\n";}; -// }; -// -// int -// main () -// { -// Array a(3); -// -// C &b = a[1]; -// const C &c = a[1]; -// C *d = a + 2; -// const C *e = a; -// -// return 0; -// } -// -//------------------------------------------------------------------------- - -namespace Imf { - - -template -class Array -{ - public: - - //----------------------------- - // Constructors and destructors - //----------------------------- - - Array () {_data = 0;} - Array (long size) {_data = new T[size];} - ~Array () {delete [] _data;} - - - //----------------------------- - // Access to the array elements - //----------------------------- - - operator T * () {return _data;} - operator const T * () const {return _data;} - - - //------------------------------------------------------ - // Resize and clear the array (the contents of the array - // are not preserved across the resize operation). - // - // resizeEraseUnsafe() is more memory efficient than - // resizeErase() because it deletes the old memory block - // before allocating a new one, but if allocating the - // new block throws an exception, resizeEraseUnsafe() - // leaves the array in an unusable state. - // - //------------------------------------------------------ - - void resizeErase (long size); - void resizeEraseUnsafe (long size); - - - private: - - Array (const Array &); // Copying and assignment - Array & operator = (const Array &); // are not implemented - - T * _data; -}; - - -template -class Array2D -{ - public: - - //----------------------------- - // Constructors and destructors - //----------------------------- - - Array2D (); // empty array, 0 by 0 elements - Array2D (long sizeX, long sizeY); // sizeX by sizeY elements - ~Array2D (); - - - //----------------------------- - // Access to the array elements - //----------------------------- - - T * operator [] (long x); - const T * operator [] (long x) const; - - - //------------------------------------------------------ - // Resize and clear the array (the contents of the array - // are not preserved across the resize operation). - // - // resizeEraseUnsafe() is more memory efficient than - // resizeErase() because it deletes the old memory block - // before allocating a new one, but if allocating the - // new block throws an exception, resizeEraseUnsafe() - // leaves the array in an unusable state. - // - //------------------------------------------------------ - - void resizeErase (long sizeX, long sizeY); - void resizeEraseUnsafe (long sizeX, long sizeY); - - - private: - - Array2D (const Array2D &); // Copying and assignment - Array2D & operator = (const Array2D &); // are not implemented - - long _sizeY; - T * _data; -}; - - -//--------------- -// Implementation -//--------------- - -template -inline void -Array::resizeErase (long size) -{ - T *tmp = new T[size]; - delete [] _data; - _data = tmp; -} - - -template -inline void -Array::resizeEraseUnsafe (long size) -{ - delete [] _data; - _data = 0; - _data = new T[size]; -} - - -template -inline -Array2D::Array2D (): - _sizeY (0), _data (0) -{ - // emtpy -} - - -template -inline -Array2D::Array2D (long sizeX, long sizeY): - _sizeY (sizeY), _data (new T[sizeX * sizeY]) -{ - // emtpy -} - - -template -inline -Array2D::~Array2D () -{ - delete [] _data; -} - - -template -inline T * -Array2D::operator [] (long x) -{ - return _data + x * _sizeY; -} - - -template -inline const T * -Array2D::operator [] (long x) const -{ - return _data + x * _sizeY; -} - - -template -inline void -Array2D::resizeErase (long sizeX, long sizeY) -{ - T *tmp = new T[sizeX * sizeY]; - delete [] _data; - _sizeY = sizeY; - _data = tmp; -} - - -template -inline void -Array2D::resizeEraseUnsafe (long sizeX, long sizeY) -{ - delete [] _data; - _data = 0; - _sizeY = 0; - _data = new T[sizeX * sizeY]; - _sizeY = sizeY; -} - - -} // namespace Imf - -#endif diff --git a/src/nvtt/bc7/arvo/ArvoMath.cpp b/src/nvtt/bc7/arvo/ArvoMath.cpp deleted file mode 100644 index 95d1a7d..0000000 --- a/src/nvtt/bc7/arvo/ArvoMath.cpp +++ /dev/null @@ -1,342 +0,0 @@ -/*************************************************************************** -* Math.C * -* * -* Some basic math functions. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 06/21/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1999, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include -#include -#include -#include "ArvoMath.h" -#include "form.h" - -namespace ArvoMath { - static const float Epsilon = 1.0E-5; - static const double LogTwo = log( 2.0 ); - -#define BinCoeffMax 500 - - double RelErr( double x, double y ) - { - double z = x - y; - if( x < 0.0 ) x = -x; - if( y < 0.0 ) y = -y; - return z / ( x > y ? x : y ); - } - - /*************************************************************************** - * A R C Q U A D * - * * - * Returns the theta / ( 2*PI ) where the input variables x and y are * - * such that x == COS( theta ) and y == SIN( theta ). * - * * - ***************************************************************************/ - float ArcQuad( float x, float y ) - { - if( Abs( x ) > Epsilon ) - { - float temp = OverTwoPi * atan( Abs( y ) / Abs( x ) ); - if( x < 0.0 ) temp = 0.5 - temp; - if( y < 0.0 ) temp = 1.0 - temp; - return( temp ); - } - else if( y > Epsilon ) return( 0.25 ); - else if( y < -Epsilon ) return( 0.75 ); - else return( 0.0 ); - } - - /*************************************************************************** - * A R C T A N * - * * - * Returns the angle theta such that x = COS( theta ) & y = SIN( theta ). * - * * - ***************************************************************************/ - float ArcTan( float x, float y ) - { - if( Abs( x ) > Epsilon ) - { - float temp = atan( Abs( y ) / Abs( x ) ); - if( x < 0.0 ) temp = Pi - temp; - if( y < 0.0 ) temp = TwoPi - temp; - return( temp ); - } - else if( y > Epsilon ) return( PiOverTwo ); - else if( y < -Epsilon ) return( 3 * PiOverTwo ); - else return( 0.0 ); - } - - /*************************************************************************** - * M A C H I N E E P S I L O N * - * * - * Returns the machine epsilon. * - * * - ***************************************************************************/ - float MachineEpsilon() - { - float x = 1.0; - float y; - float z = 1.0 + x; - while( z > 1.0 ) - { - y = x; - x /= 2.0; - z = (float)( 1.0 + (float)x ); // Avoid double precision! - } - return (float)y; - } - - /*************************************************************************** - * L O G G A M M A * - * * - * Computes the natural log of the gamma function using the Lanczos * - * approximation formula. Gamma is defined by * - * * - * ( z - 1 ) -t * - * gamma( z ) = Integral[ t e dt ] * - * * - * * - * where the integral ranges from 0 to infinity. The gamma function * - * satisfies * - * gamma( n + 1 ) = n! * - * * - * This algorithm has been adapted from "Numerical Recipes", p. 157. * - * * - ***************************************************************************/ - double LogGamma( double x ) - { - static const double - coeff0 = 7.61800917300E+1, - coeff1 = -8.65053203300E+1, - coeff2 = 2.40140982200E+1, - coeff3 = -1.23173951600E+0, - coeff4 = 1.20858003000E-3, - coeff5 = -5.36382000000E-6, - stp = 2.50662827465E+0, - half = 5.00000000000E-1, - fourpf = 4.50000000000E+0, - one = 1.00000000000E+0, - two = 2.00000000000E+0, - three = 3.00000000000E+0, - four = 4.00000000000E+0, - five = 5.00000000000E+0; - double r = coeff0 / ( x ) + coeff1 / ( x + one ) + - coeff2 / ( x + two ) + coeff3 / ( x + three ) + - coeff4 / ( x + four ) + coeff5 / ( x + five ) ; - double s = x + fourpf; - double t = ( x - half ) * log( s ) - s; - return t + log( stp * ( r + one ) ); - } - - /*************************************************************************** - * L O G F A C T * - * * - * Returns the natural logarithm of n factorial. For efficiency, some * - * of the values are cached, so they need be computed only once. * - * * - ***************************************************************************/ - double LogFact( int n ) - { - static const int Cache_Size = 100; - static double c[ Cache_Size ] = { 0.0 }; // Cache some of the values. - if( n <= 1 ) return 0.0; - if( n < Cache_Size ) - { - if( c[n] == 0.0 ) c[n] = LogGamma((double)(n+1)); - return c[n]; - } - return LogGamma((double)(n+1)); // gamma(n+1) == n! - } - - /*************************************************************************** - * M U L T I N O M I A L C O E F F * - * * - * Returns the multinomial coefficient ( n; X1 X2 ... Xk ) which is * - * defined to be n! / ( X1! X2! ... Xk! ). This is done by computing * - * exp( log(n!) - log(X1!) - log(X2!) - ... - log(Xk!) ). The value of * - * n is obtained by summing the Xi's. * - * * - ***************************************************************************/ - double MultinomialCoeff( int k, int X[] ) - { - int i; - // Find n by summing the coefficients. - - int n = X[0]; - for( i = 1; i < k; i++ ) n += X[i]; - - // Compute log(n!) then subtract log(X!) for each X. - - double LogCoeff = LogFact( n ); - for( i = 0; i < k; i++ ) LogCoeff -= LogFact( X[i] ); - - // Round the exponential of the result to the nearest integer. - - return floor( exp( LogCoeff ) + 0.5 ); - } - - - double MultinomialCoeff( int i, int j, int k ) - { - int n = i + j + k; - double x = LogFact( n ) - LogFact( i ) - LogFact( j ) - LogFact( k ); - return floor( exp( x ) + 0.5 ); - } - - /*************************************************************************** - * B I N O M I A L C O E F F S * - * * - * Generate all n+1 binomial coefficents for a given n. This is done by * - * computing the n'th row of Pascal's triange, starting from the top. * - * No additional storage is required. * - * * - ***************************************************************************/ - void BinomialCoeffs( int n, long *coeff ) - { - coeff[0] = 1; - for( int i = 1; i <= n; i++ ) - { - long a = coeff[0]; - long b = coeff[1]; - for( int j = 1; j < i; j++ ) // Make next row of Pascal's triangle. - { - coeff[j] = a + b; // Overwrite the old row. - a = b; - b = coeff[j+1]; - } - coeff[i] = 1; // The last entry in any row is always 1. - } - } - - void BinomialCoeffs( int n, double *coeff ) - { - coeff[0] = 1.0; - for( int i = 1; i <= n; i++ ) - { - double a = coeff[0]; - double b = coeff[1]; - for( int j = 1; j < i; j++ ) // Make next row of Pascal's triangle. - { - coeff[j] = a + b; // Overwrite the old row. - a = b; - b = coeff[j+1]; - } - coeff[i] = 1.0; // The last entry in any row is always 1. - } - } - - const double *BinomialCoeffs( int n ) - { - static double *coeff[ BinCoeffMax + 1 ] = { 0 }; - if( n > BinCoeffMax || n < 0 ) - { - std::cerr << form( "%d is outside of (0,%d) in BinomialCoeffs", n, BinCoeffMax ); - return NULL; - } - if( coeff[n] == NULL ) // Fill in this entry. - { - double *c = new double[ n + 1 ]; - if( c == NULL ) - { - std::cerr << form( "Could not allocate for BinomialCoeffs(%d)", n ); - return NULL; - } - BinomialCoeffs( n, c ); - coeff[n] = c; - } - return coeff[n]; - } - - /*************************************************************************** - * B I N O M I A L C O E F F * - * * - * Compute a given binomial coefficient. Several rows of Pascal's * - * triangle are stored for efficiently computing the small coefficients. * - * Higher-order terms are computed using LogFact. * - * * - ***************************************************************************/ - double BinomialCoeff( int n, int k ) - { - double b; - int p = n - k; - if( k <= 1 || p <= 1 ) // Check for errors and special cases. - { - if( k == 0 || p == 0 ) return 1; - if( k == 1 || p == 1 ) return n; - std::cerr << form( "BinomialCoeff(%d,%d) is undefined", n, k ); - return 0; - } - static const int // Store part of Pascal's triange for small coeffs. - n0[] = { 1 }, - n1[] = { 1, 1 }, - n2[] = { 1, 2, 1 }, - n3[] = { 1, 3, 3, 1 }, - n4[] = { 1, 4, 6, 4, 1 }, - n5[] = { 1, 5, 10, 10, 5, 1 }, - n6[] = { 1, 6, 15, 20, 15, 6, 1 }, - n7[] = { 1, 7, 21, 35, 35, 21, 7, 1 }, - n8[] = { 1, 8, 28, 56, 70, 56, 28, 8, 1 }, - n9[] = { 1, 9, 36, 84, 126, 126, 84, 36, 9, 1 }; - switch( n ) - { - case 0 : b = n0[k]; break; - case 1 : b = n1[k]; break; - case 2 : b = n2[k]; break; - case 3 : b = n3[k]; break; - case 4 : b = n4[k]; break; - case 5 : b = n5[k]; break; - case 6 : b = n6[k]; break; - case 7 : b = n7[k]; break; - case 8 : b = n8[k]; break; - case 9 : b = n9[k]; break; - default: - { - double x = LogFact( n ) - LogFact( p ) - LogFact( k ); - b = floor( exp( x ) + 0.5 ); - } - } - return b; - } - - - /*************************************************************************** - * L O G D O U B L E F A C T (Log of double factorial) * - * * - * Return log( n!! ) where the double factorial is defined by * - * * - * (2 n + 1)!! = 1 * 3 * 5 * ... * (2n + 1) (Odd integers) * - * * - * (2 n)!! = 2 * 4 * 6 * ... * 2n (Even integers) * - * * - * and is related to the single factorial via * - * * - * (2 n + 1)!! = (2 n + 1)! / ( 2^n n! ) (Odd integers) * - * * - * (2 n)!! = 2^n n! (Even integers) * - * * - ***************************************************************************/ - double LogDoubleFact( int n ) // log( n!! ) - { - int k = n / 2; - double f = LogFact( k ) + k * LogTwo; - if( Odd(n) ) f = LogFact( n ) - f; - return f; - } -}; diff --git a/src/nvtt/bc7/arvo/ArvoMath.h b/src/nvtt/bc7/arvo/ArvoMath.h deleted file mode 100644 index e9edd7e..0000000 --- a/src/nvtt/bc7/arvo/ArvoMath.h +++ /dev/null @@ -1,212 +0,0 @@ -/*************************************************************************** -* Math.h * -* * -* Convenient constants, macros, and inline functions for basic math * -* functions. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 06/17/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1999, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __MATH_INCLUDED__ -#define __MATH_INCLUDED__ - -#include -#include - -namespace ArvoMath { - -#ifndef MAXFLOAT -#define MAXFLOAT 1.0E+20 -#endif - - static const double - Pi = 3.14159265358979, - PiSquared = Pi * Pi, - TwoPi = 2.0 * Pi, - FourPi = 4.0 * Pi, - PiOverTwo = Pi / 2.0, - PiOverFour = Pi / 4.0, - OverPi = 1.0 / Pi, - OverTwoPi = 1.0 / TwoPi, - OverFourPi = 1.0 / FourPi, - Infinity = MAXFLOAT, - Tiny = 1.0 / MAXFLOAT, - DegreesToRad = Pi / 180.0, - RadToDegrees = 180.0 / Pi; - - inline int Odd ( int k ) { return k & 1; } - inline int Even ( int k ) { return !(k & 1); } - inline float Abs ( int x ) { return x > 0 ? x : -x; } - inline float Abs ( float x ) { return x > 0. ? x : -x; } - inline float Abs ( double x ) { return x > 0. ? x : -x; } - inline float Min ( float x, float y ) { return x < y ? x : y; } - inline float Max ( float x, float y ) { return x > y ? x : y; } - inline double dMin ( double x, double y ) { return x < y ? x : y; } - inline double dMax ( double x, double y ) { return x > y ? x : y; } - inline float Sqr ( int x ) { return x * x; } - inline float Sqr ( float x ) { return x * x; } - inline float Sqr ( double x ) { return x * x; } - inline float Sqrt ( double x ) { return x > 0. ? sqrt(x) : 0.; } - inline float Cubed ( float x ) { return x * x * x; } - inline int Sign ( float x ) { return x > 0. ? 1 : (x < 0. ? -1 : 0); } - inline void Swap ( float &a, float &b ) { float c = a; a = b; b = c; } - inline void Swap ( int &a, int &b ) { int c = a; a = b; b = c; } - inline double Sin ( double x, int n ) { return pow( sin(x), n ); } - inline double Cos ( double x, int n ) { return pow( cos(x), n ); } - inline float ToSin ( double x ) { return Sqrt( 1.0 - Sqr(x) ); } - inline float ToCos ( double x ) { return Sqrt( 1.0 - Sqr(x) ); } - inline float MaxAbs( float x, float y ) { return Max( Abs(x), Abs(y) ); } - inline float MinAbs( float x, float y ) { return Min( Abs(x), Abs(y) ); } - inline float Pythag( double x, double y ) { return Sqrt( x*x + y*y ); } - - inline double ArcCos( double x ) - { - double y; - if( -1.0 <= x && x <= 1.0 ) y = acos( x ); - else if( x > 1.0 ) y = 0.0; - else if( x < -1.0 ) y = Pi; - return y; - } - - inline double ArcSin( double x ) - { - if( x < -1.0 ) x = -1.0; - if( x > 1.0 ) x = 1.0; - return asin( x ); - } - - inline float Clamp( float min, float &x, float max ) - { - if( x < min ) x = min; else - if( x > max ) x = max; - return x; - } - - inline double Clamp( float min, double &x, float max ) - { - if( x < min ) x = min; else - if( x > max ) x = max; - return x; - } - - inline float Max( float x, float y, float z ) - { - float t; - if( x >= y && x >= z ) t = x; - else if( y >= z ) t = y; - else t = z; - return t; - } - - inline float Min( float x, float y, float z ) - { - float t; - if( x <= y && x <= z ) t = x; - else if( y <= z ) t = y; - else t = z; - return t; - } - - inline float Max( float x, float y, float z, float w ) - { - float t; - if( x >= y && x >= z && x >= w) t = x; - else if( y >= z && y >= w ) t = y; - else if (z >= w) t = z; - else t = w; - return t; - } - - inline float Min( float x, float y, float z, float w ) - { - float t; - if( x <= y && x <= z && x <= w) t = x; - else if( y <= z && y <= w ) t = y; - else if (z <= w) t = z; - else t = w; - return t; - } - - inline double dMax( double x, double y, double z ) - { - double t; - if( x >= y && x >= z ) t = x; - else if( y >= z ) t = y; - else t = z; - return t; - } - - inline double dMin( double x, double y, double z ) - { - double t; - if( x <= y && x <= z ) t = x; - else if( y <= z ) t = y; - else t = z; - return t; - } - - inline float MaxAbs( float x, float y, float z ) - { - return Max( Abs( x ), Abs( y ), Abs( z ) ); - } - - inline float MaxAbs( float x, float y, float z, float w ) - { - return Max( Abs( x ), Abs( y ), Abs( z ), Abs( w ) ); - } - - inline float Pythag( float x, float y, float z ) - { - return sqrt( x * x + y * y + z * z ); - } - - extern float ArcTan ( float x, float y ); - extern float ArcQuad ( float x, float y ); - extern float MachineEpsilon ( ); - extern double LogGamma ( double x ); - extern double LogFact ( int n ); - extern double LogDoubleFact ( int n ); // log( n!! ) - extern double BinomialCoeff ( int n, int k ); - extern void BinomialCoeffs ( int n, long *coeffs ); - extern void BinomialCoeffs ( int n, double *coeffs ); - extern double MultinomialCoeff( int i, int j, int k ); - extern double MultinomialCoeff( int k, int N[] ); - extern double RelErr ( double x, double y ); - -#ifndef ABS -#define ABS( x ) ((x) > 0 ? (x) : -(x)) -#endif - -#ifndef MAX -#define MAX( x, y ) ((x) > (y) ? (x) : (y)) -#endif - -#ifndef MIN -#define MIN( x, y ) ((x) < (y) ? (x) : (y)) -#endif - -}; - -#endif - - - - - - - diff --git a/src/nvtt/bc7/arvo/Char.cpp b/src/nvtt/bc7/arvo/Char.cpp deleted file mode 100644 index cc450a5..0000000 --- a/src/nvtt/bc7/arvo/Char.cpp +++ /dev/null @@ -1,420 +0,0 @@ -/*************************************************************************** -* Char.h * -* * -* Convenient constants, macros, and inline functions for manipulation of * -* characters and strings. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 07/01/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1999, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include -#include -#include "Char.h" - -namespace ArvoMath { - - typedef char *charPtr; - - // Treat "str" as a file name, and return just the directory - // portion -- i.e. strip off the name of the leaf object (but - // leave the final "/". - const char *getPath( const char *str, char *buff ) - { - int k; - for( k = strlen( str ) - 1; k >= 0; k-- ) - { - if( str[k] == Slash ) break; - } - for( int i = 0; i <= k; i++ ) buff[i] = str[i]; - buff[k+1] = NullChar; - return buff; - } - - // Treat "str" as a file name, and return just the file name - // portion -- i.e. strip off everything up to and including - // the final "/". - const char *getFile( const char *str, char *buff ) - { - int k; - int len = strlen( str ); - for( k = len - 1; k >= 0; k-- ) - { - if( str[k] == Slash ) break; - } - for( int i = 0; i < len - k; i++ ) buff[i] = str[ i + k + 1 ]; - return buff; - } - - int getPrefix( const char *str, char *buff ) - { - int len = 0; - while( *str != NullChar && *str != Period ) - { - *buff++ = *str++; - len++; - } - *buff = NullChar; - return len; - } - - int getSuffix( const char *str, char *buff ) - { - int n = strlen( str ); - int k = n - 1; - while( k >= 0 && str[k] != Period ) k--; - for( int i = k + 1; i < n; i++ ) *buff++ = str[i]; - *buff = NullChar; - return n - k - 1; - } - - const char* toString( int number, char *buff ) - { - static char local_buff[32]; - char *str = ( buff == NULL ) ? local_buff : buff; - sprintf( str, "%d", number ); - return str; - } - - const char* toString( float number, char *buff ) - { - static char local_buff[32]; - char *str = ( buff == NULL ) ? local_buff : buff; - sprintf( str, "%g", number ); - return str; - } - - int isInteger( const char *str ) - { - int n = strlen( str ); - for( int i = 0; i < n; i++ ) - { - char c = str[i]; - if( isDigit(c) ) continue; - if( c == Plus || c == Minus ) continue; - if( c == Space ) continue; - return 0; - } - return 1; - } - - // Test to see if a string has a given suffix. - int hasSuffix( const char *string, const char *suffix ) - { - if( suffix == NULL ) return 1; // The null suffix always matches. - if( string == NULL ) return 0; // The null string can only have a null suffix. - int m = strlen( string ); - int k = strlen( suffix ); - if( k <= 0 ) return 1; // Empty suffix always matches. - if( m < k + 1 ) return 0; // String is too short to have this suffix. - - // See if the file has the given suffix. - int s = m - k; // Beginning of suffix (if it matches). - for( int i = 0; i < k; i++ ) - if( string[ s + i ] != suffix[ i ] ) return 0; - return s; // Always > 0. - } - - // Test to see if a string has a given prefix. - int hasPrefix( const char *string, const char *prefix ) - { - if( prefix == NULL ) return 1; // The null prefix always matches. - if( string == NULL ) return 0; // The null string can only have a null suffix. - while( *prefix ) - { - if( *prefix++ != *string++ ) return 0; - } - return 1; - } - - // Test to see if the string contains the given character. - int inString( char c, const char *str ) - { - if( str == NULL || str[0] == NullChar ) return 0; - while( *str != '\0' ) - if( *str++ == c ) return 1; - return 0; - } - - int nullString( const char *str ) - { - return str == NULL || str[0] == NullChar; - } - - const char *stripSuffix( const char *string, const char *suffix, char *buff ) - { - static char local_buff[256]; - if( buff == NULL ) buff = local_buff; - buff[0] = NullChar; - if( !hasSuffix( string, suffix ) ) return NULL; - int s = strlen( string ) - strlen( suffix ); - for( int i = 0; i < s; i++ ) - { - buff[i] = string[i]; - } - buff[s] = NullChar; - return buff; - } - - int getIndex( const char *pat, const char *str ) - { - int p_len = strlen( pat ); - int s_len = strlen( str ); - if( p_len == 0 || s_len == 0 ) return -1; - for( int i = 0; i <= s_len - p_len; i++ ) - { - int match = 1; - for( int j = 0; j < p_len; j++ ) - { - if( str[ i + j ] != pat[ j ] ) { match = 0; break; } - } - if( match ) return i; - } - return -1; - } - - int getSubstringAfter( const char *pat, const char *str, char *buff ) - { - int ind = getIndex( pat, str ); - if( ind < 0 ) return -1; - int p_len = strlen( pat ); - int k = 0; - for( int i = ind + p_len; ; i++ ) - { - buff[ k++ ] = str[ i ]; - if( str[ i ] == NullChar ) break; - } - return k; - } - - const char *SubstringAfter( const char *pat, const char *str, char *user_buff ) - { - static char temp[128]; - char *buff = ( user_buff != NULL ) ? user_buff : temp; - int k = getSubstringAfter( pat, str, buff ); - if( k > 0 ) return buff; - return str; - } - - const char *metaString( const char *str, char *user_buff ) - { - static char temp[128]; - char *buff = ( user_buff != NULL ) ? user_buff : temp; - sprintf( buff, "\"%s\"", str ); - return buff; - } - - // This is the opposite of metaString. - const char *stripQuotes( const char *str, char *user_buff ) - { - static char temp[128]; - char *buff = ( user_buff != NULL ) ? user_buff : temp; - char *b = buff; - for(;;) - { - if( *str != DoubleQuote ) *b++ = *str; - if( *str == NullChar ) break; - str++; - } - return buff; - } - - int getIntFlag( const char *flags, const char *flag, int &value ) - { - while( *flags ) - { - if( hasPrefix( flags, flag ) ) - { - int k = strlen( flag ); - if( flags[k] == '=' ) - { - value = atoi( flags + k + 1 ); - return 1; - } - } - flags++; - } - return 0; - } - - int getFloatFlag( const char *flags, const char *flag, float &value ) - { - while( *flags ) - { - if( hasPrefix( flags, flag ) ) - { - int k = strlen( flag ); - if( flags[k] == '=' ) - { - value = atof( flags + k + 1 ); - return 1; - } - } - flags++; - } - return 0; - } - - SortedList::SortedList( sort_type type_, int ascend_ ) - { - type = type_; - ascend = ascend_; - num_elements = 0; - max_elements = 0; - sorted = 1; - list = NULL; - } - - SortedList::~SortedList() - { - Clear(); - delete[] list; - } - - void SortedList::Clear() - { - // Delete all the private copies of the strings and re-initialize the - // list. Reuse the same list, expanding it when necessary. - for( int i = 0; i < num_elements; i++ ) - { - delete list[i]; - list[i] = NULL; - } - num_elements = 0; - sorted = 1; - } - - SortedList &SortedList::operator<<( const char *str ) - { - // Add a new string to the end of the list, expanding the list if necessary. - // Mark the list as unsorted, so that the next reference to an element will - // cause the list to be sorted again. - if( num_elements == max_elements ) Expand(); - list[ num_elements++ ] = strdup( str ); - sorted = 0; - return *this; - } - - const char *SortedList::operator()( int i ) - { - // Return the i'th element of the list. Sort first if necessary. - static char *null = ""; - if( num_elements == 0 || i < 0 || i >= num_elements ) return null; - if( !sorted ) Sort(); - return list[i]; - } - - void SortedList::Expand() - { - // Create a new list of twice the size and copy the old list into it. - // This doubles "max_elements", but leaves "num_elements" unchanged. - if( max_elements == 0 ) max_elements = 1; - max_elements *= 2; - charPtr *new_list = new charPtr[ max_elements ]; - for( int i = 0; i < max_elements; i++ ) - new_list[i] = ( i < num_elements ) ? list[i] : NULL; - delete[] list; - list = new_list; - } - - void SortedList::Swap( int i, int j ) - { - char *temp = list[i]; - list[i] = list[j]; - list[j] = temp; - } - - int SortedList::inOrder( int p, int q ) const - { - int test; - if( type == sort_alphabetic ) - test = ( strcmp( list[p], list[q] ) <= 0 ); - else - { - int len_p = strlen( list[p] ); - int len_q = strlen( list[q] ); - test = ( len_p < len_q ) || - ( len_p == len_q && strcmp( list[p], list[q] ) <= 0 ); - } - if( ascend ) return test; - return !test; - } - - // This is an insertion sort that operates on subsets of the - // input defined by the step length. - void SortedList::InsertionSort( int start, int size, int step ) - { - for( int i = 0; i + step < size; i += step ) - { - for( int j = i; j >= 0; j -= step ) - { - int p = start + j; - int q = p + step; - if( inOrder( p, q ) ) break; - Swap( p, q ); - } - } - } - - // This is a Shell sort. - void SortedList::Sort() - { - for( int step = num_elements / 2; step > 1; step /= 2 ) - for( int start = 0; start < step; start++ ) - InsertionSort( start, num_elements - start, step ); - InsertionSort( 0, num_elements, 1 ); - sorted = 1; - } - - void SortedList::SetOrder( sort_type type_, int ascend_ ) - { - if( type_ != type || ascend_ != ascend ) - { - type = type_; - ascend = ascend_; - sorted = 0; - } - } - - int getstring( std::istream &in, const char *str ) - { - char ch; - if( str == NULL ) return 1; - while( *str != NullChar ) - { - in >> ch; - if( *str != ch ) return 0; - str++; - } - return 1; - } - - std::istream &skipWhite( std::istream &in ) - { - char c; - while( in.get(c) ) - { - if( !isWhite( c ) ) - { - in.putback(c); - break; - } - } - return in; - } -}; diff --git a/src/nvtt/bc7/arvo/Char.h b/src/nvtt/bc7/arvo/Char.h deleted file mode 100644 index 2742c1d..0000000 --- a/src/nvtt/bc7/arvo/Char.h +++ /dev/null @@ -1,245 +0,0 @@ -/*************************************************************************** -* Char.h * -* * -* Convenient constants, macros, and inline functions for manipulation of * -* characters and strings. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 07/01/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1999, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __CHAR_INCLUDED__ -#define __CHAR_INCLUDED__ - -#include -#include - -namespace ArvoMath { - - static const char - Apostrophe = '\'' , - Asterisk = '*' , - Atsign = '@' , - Backslash = '\\' , - Bell = '\7' , - Colon = ':' , - Comma = ',' , - Dash = '-' , - DoubleQuote = '"' , - EqualSign = '=' , - Exclamation = '!' , - GreaterThan = '>' , - Hash = '#' , - Lbrack = '[' , - Lcurley = '{' , - LessThan = '<' , - Lparen = '(' , - Minus = '-' , - NewLine = '\n' , - NullChar = '\0' , - Percent = '%' , - Period = '.' , - Pound = '#' , - Plus = '+' , - Rbrack = ']' , - Rcurley = '}' , - Rparen = ')' , - Semicolon = ';' , - Space = ' ' , - Slash = '/' , - Star = '*' , - Tab = '\t' , - Tilde = '~' , - Underscore = '_' ; - - inline int isWhite( char c ) { return c == Space || c == NewLine || c == Tab; } - inline int isUcase( char c ) { return 'A' <= c && c <= 'Z'; } - inline int isLcase( char c ) { return 'a' <= c && c <= 'z'; } - inline int isAlpha( char c ) { return isUcase( c ) || isLcase( c ); } - inline int isDigit( char c ) { return '0' <= c && c <= '9'; } - inline char ToLower( char c ) { return isUcase( c ) ? c + ( 'a' - 'A' ) : c; } - inline char ToUpper( char c ) { return isLcase( c ) ? c + ( 'A' - 'a' ) : c; } - - extern const char *getPath( - const char *str, - char *buff - ); - - extern const char *getFile( - const char *str, - char *buff - ); - - extern int getPrefix( - const char *str, - char *buff - ); - - extern int getSuffix( - const char *str, - char *buff - ); - - extern int isInteger( - const char *str - ); - - extern int hasSuffix( - const char *string, - const char *suffix - ); - - extern int hasPrefix( - const char *string, - const char *prefix - ); - - extern int inString( - char c, - const char *str - ); - - extern int nullString( - const char *str - ); - - extern const char *stripSuffix( // Return NULL if unsuccessful. - const char *string, // The string to truncate. - const char *suffix, // The suffix to remove. - char *buff = NULL // Defaults to internal buffer. - ); - - extern const char* toString( - int n, // An integer to convert to a string. - char *buff = NULL // Defauts to internal buffer. - ); - - extern const char* toString( - float x, // A float to convert to a string. - char *buff = NULL // Defauts to internal buffer. - ); - - extern int getIndex( // The index of the start of a pattern in a string. - const char *pat, // The pattern to look for. - const char *str // The string to search. - ); - - extern int getSubstringAfter( - const char *pat, - const char *str, - char *buff - ); - - extern const char *SubstringAfter( - const char *pat, - const char *str, - char *buff = NULL // Defauts to internal buffer. - ); - - extern const char *metaString( - const char *str, // Make this a string within a string. - char *buff = NULL // Defauts to internal buffer. - ); - - extern const char *stripQuotes( - const char *str, // This is the opposite of metaString. - char *buff = NULL // Defauts to internal buffer. - ); - - extern int getIntFlag( - const char *flags, // List of assignment statements. - const char *flag, // A specific flag to look for. - int &value // The variable to assign the value to. - ); - - extern int getFloatFlag( - const char *flags, // List of assignment statements. - const char *flag, // A specific flag to look for. - float &value // The variable to assign the value to. - ); - - extern int getstring( - std::istream &in, - const char *str - ); - - enum sort_type { - sort_alphabetic, // Standard dictionary ordering. - sort_lexicographic // Sort first by length, then alphabetically. - }; - - class SortedList { - - public: - SortedList( sort_type = sort_alphabetic, int ascending = 1 ); - ~SortedList(); - SortedList &operator<<( const char * ); - int Size() const { return num_elements; } - const char *operator()( int i ); - void Clear(); - void SetOrder( sort_type = sort_alphabetic, int ascending = 1 ); - - private: - void Sort(); - void InsertionSort( int start, int size, int step ); - void Swap( int i, int j ); - void Expand(); - int inOrder( int i, int j ) const; - int num_elements; - int max_elements; - int sorted; - int ascend; - sort_type type; - char **list; - }; - - - inline int Match( const char *s, const char *t ) - { - return s != NULL && - (t != NULL && strcmp( s, t ) == 0); - } - - inline int Match( const char *s, const char *t1, const char *t2 ) - { - return s != NULL && ( - (t1 != NULL && strcmp( s, t1 ) == 0) || - (t2 != NULL && strcmp( s, t2 ) == 0) ); - } - - union long_union_float { - long i; - float f; - }; - - inline long float_as_long( float x ) - { - long_union_float u; - u.f = x; - return u.i; - } - - inline float long_as_float( long i ) - { - long_union_float u; - u.i = i; - return u.f; - } - - extern std::istream &skipWhite( std::istream &in ); -}; -#endif diff --git a/src/nvtt/bc7/arvo/Complex.cpp b/src/nvtt/bc7/arvo/Complex.cpp deleted file mode 100644 index 468704f..0000000 --- a/src/nvtt/bc7/arvo/Complex.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/*************************************************************************** -* Complex.C * -* * -* Complex numbers, complex arithmetic, and functions of a complex * -* variable. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 03/02/2000 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 2000, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include "Complex.h" -#include "form.h" - -namespace ArvoMath { - const Complex Complex::i( 0.0, 1.0 ); - - std::ostream &operator<<( std::ostream &out, const Complex &z ) - { - out << form( "(%f,%f) ", z.Real(), z.Imag() ); - return out; - } - - Complex cos( const Complex &z ) - { - return Complex( - ::cos( z.Real() ) * ::cosh( z.Imag() ), - -::sin( z.Real() ) * ::sinh( z.Imag() ) - ); - } - - Complex sin( const Complex &z ) - { - return Complex( - ::sin( z.Real() ) * ::cosh( z.Imag() ), - ::cos( z.Real() ) * ::sinh( z.Imag() ) - ); - } - - Complex cosh( const Complex &z ) - { - return Complex( - ::cosh( z.Real() ) * ::cos( z.Imag() ), - ::sinh( z.Real() ) * ::sin( z.Imag() ) - ); - } - - Complex sinh( const Complex &z ) - { - return Complex( - ::sinh( z.Real() ) * ::cos( z.Imag() ), - ::cosh( z.Real() ) * ::sin( z.Imag() ) - ); - } - - Complex log( const Complex &z ) - { - float r = ::sqrt( z.Real() * z.Real() + z.Imag() * z.Imag() ); - float t = ::acos( z.Real() / r ); - if( z.Imag() < 0.0 ) t = 2.0 * 3.1415926 - t; - return Complex( ::log(r), t ); - } -}; diff --git a/src/nvtt/bc7/arvo/Complex.h b/src/nvtt/bc7/arvo/Complex.h deleted file mode 100644 index 671fd57..0000000 --- a/src/nvtt/bc7/arvo/Complex.h +++ /dev/null @@ -1,187 +0,0 @@ -/*************************************************************************** -* Complex.h * -* * -* Complex numbers, complex arithmetic, and functions of a complex * -* variable. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 03/02/2000 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 2000, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __COMPLEX_INCLUDED__ -#define __COMPLEX_INCLUDED__ - -#include -#include - -namespace ArvoMath { - - class Complex { - public: - Complex() { x = 0; y = 0; } - Complex( float a ) { x = a; y = 0; } - Complex( float a, float b ) { x = a; y = b; } - Complex( const Complex &z ) { *this = z; } - float &Real() { return x; } - float &Imag() { return y; } - float Real() const { return x; } - float Imag() const { return y; } - inline Complex &operator=( const Complex &z ); - static const Complex i; - private: - float x; - float y; - }; - - inline Complex &Complex::operator=( const Complex &z ) - { - x = z.Real(); - y = z.Imag(); - return *this; - } - - inline float Real( const Complex &z ) - { - return z.Real(); - } - - inline float Imag( const Complex &z ) - { - return z.Imag(); - } - - inline Complex conj( const Complex &z ) - { - return Complex( z.Real(), -z.Imag() ); - } - - inline double modsqr( const Complex &z ) - { - return z.Real() * z.Real() + z.Imag() * z.Imag(); - } - - inline double modulus( const Complex &z ) - { - return sqrt( z.Real() * z.Real() + z.Imag() * z.Imag() ); - } - - inline double arg( const Complex &z ) - { - float t = acos( z.Real() / modulus(z) ); - if( z.Imag() < 0.0 ) t = 2.0 * 3.1415926 - t; - return t; - } - - inline Complex operator*( const Complex &z, float a ) - { - return Complex( a * z.Real(), a * z.Imag() ); - } - - inline Complex operator*( float a, const Complex &z ) - { - return Complex( a * z.Real(), a * z.Imag() ); - } - - inline Complex operator*( const Complex &z, const Complex &w ) - { - return Complex( - z.Real() * w.Real() - z.Imag() * w.Imag(), - z.Real() * w.Imag() + z.Imag() * w.Real() - ); - } - - inline Complex operator+( const Complex &z, const Complex &w ) - { - return Complex( z.Real() + w.Real(), z.Imag() + w.Imag() ); - } - - inline Complex operator-( const Complex &z, const Complex &w ) - { - return Complex( z.Real() - w.Real(), z.Imag() - w.Imag() ); - } - - inline Complex operator-( const Complex &z ) - { - return Complex( -z.Real(), -z.Imag() ); - } - - inline Complex operator/( const Complex &z, float w ) - { - return Complex( z.Real() / w, z.Imag() / w ); - } - - inline Complex operator/( const Complex &z, const Complex &w ) - { - return ( z * conj(w) ) / modsqr(w); - } - - inline Complex operator/( float a, const Complex &w ) - { - return conj(w) * ( a / modsqr(w) ); - } - - inline Complex &operator+=( Complex &z, const Complex &w ) - { - z.Real() += w.Real(); - z.Imag() += w.Imag(); - return z; - } - - inline Complex &operator*=( Complex &z, const Complex &w ) - { - return z = ( z * w ); - } - - inline Complex &operator-=( Complex &z, const Complex &w ) - { - z.Real() -= w.Real(); - z.Imag() -= w.Imag(); - return z; - } - - inline Complex exp( const Complex &z ) - { - float r = ::exp( z.Real() ); - return Complex( r * cos( z.Imag() ), r * sin( z.Imag() ) ); - } - - inline Complex pow( const Complex &z, int n ) - { - float r = ::pow( modulus( z ), (double)n ); - float t = arg( z ); - return Complex( r * cos( n * t ), r * sin( n * t ) ); - } - - inline Complex polar( float r, float theta ) - { - return Complex( r * cos( theta ), r * sin( theta ) ); - } - - - extern Complex cos ( const Complex &z ); - extern Complex sin ( const Complex &z ); - extern Complex cosh( const Complex &z ); - extern Complex sinh( const Complex &z ); - extern Complex log ( const Complex &z ); - - extern std::ostream &operator<<( - std::ostream &out, - const Complex & - ); -}; -#endif - diff --git a/src/nvtt/bc7/arvo/Matrix.cpp b/src/nvtt/bc7/arvo/Matrix.cpp deleted file mode 100644 index d84b7ef..0000000 --- a/src/nvtt/bc7/arvo/Matrix.cpp +++ /dev/null @@ -1,1201 +0,0 @@ -/*************************************************************************** -* Matrix.C * -* * -* General Vector and Matrix classes, with all the associated methods. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 08/16/2000 Revamped for CIT tools. * -* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * -* arvo 06/30/1993 Added singular value decomposition class. * -* arvo 06/25/1993 Major revisions. * -* arvo 09/08/1991 Initial implementation. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 2000, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include -#include -#include "ArvoMath.h" -#include "Vector.h" -#include "Matrix.h" -#include "form.h" - -namespace ArvoMath { - const Matrix Matrix::Null(0); - - /*-------------------------------------------------------------------------* - * * - * C O N S T R U C T O R S * - * * - *-------------------------------------------------------------------------*/ - - // Create a new matrix of the given size. If n_cols is zero (the default), - // it is assumed that the matrix is to be square; that is, n_rows x n_rows. - // The matrix is filled with "value", which defaults to zero. - Matrix::Matrix( int n_rows, int n_cols, float value ) - { - assert( n_rows >= 0 && n_cols >= 0 ); - rows = 0; - cols = 0; - elem = NULL; - SetSize( n_rows, n_cols ); - float *e = elem; - for( register int i = 0; i < rows * cols; i++ ) *e++ = value; - } - - // Copy constructor. - Matrix::Matrix( const Matrix &M ) - { - rows = 0; - cols = 0; - elem = NULL; - SetSize( M.Rows(), M.Cols() ); - register float *e = elem; - register float *m = M.Array(); - for( register int i = 0; i < rows * cols; i++ ) *e++ = *m++; - } - - Matrix::~Matrix() - { - SetSize( 0, 0 ); - } - - /*-------------------------------------------------------------------------* - * * - * M I S C E L L A N E O U S M E T H O D S * - * * - *-------------------------------------------------------------------------*/ - - // Re-shape the matrix. If the number of elements in the new matrix is - // different from the original matrix, the original data is deleted and - // replaced with a new array. If new_cols is zero (the default), it is - // assumed to be the same as new_rows -- i.e. a square matrix. - void Matrix::SetSize( int new_rows, int new_cols ) - { - if( new_cols == 0 ) new_cols = new_rows; - int n = new_rows * new_cols; - if( rows * cols != n ) - { - if( elem != NULL ) delete[] elem; - elem = ( n == 0 ) ? NULL : new float[ n ]; - } - rows = new_rows; - cols = new_cols; - } - - Vector Matrix::GetCol( int j ) const - { - Vector C( rows ); - float *e = elem + j; - float *c = C.Array(); - for( int i = 0; i < rows; i++ ) - { - *c++ = *e; - e += cols; - } - return C; - } - - Vector Matrix::GetRow( int i ) const - { - Vector R( cols ); - float *e = elem + ( i * cols ); - float *r = R.Array(); - for( int j = 0; j < cols; j++ ) *r++ = *e++; - return R; - } - - void Matrix::SetCol( int j, const Vector &C ) - { - assert( rows == C.Size() ); - float *e = elem + j; - float *c = C.Array(); - for( int i = 0; i < rows; i++ ) - { - *e = *c++; - e += cols; - } - } - - void Matrix::SetRow( int i, const Vector &R ) - { - assert( cols == R.Size() ); - float *e = elem + ( i * cols ); - float *r = R.Array(); - for( int j = 0; j < cols; j++ ) *e++ = *r++; - } - - Matrix Matrix::GetBlock( int imin, int imax, int jmin, int jmax ) const - { - if( imax < imin || jmax < jmin ) return Matrix(0,0); - Matrix M( imax - imin + 1, jmax - jmin + 1 ); - for( int i = imin; i <= imax; i++ ) - for( int j = jmin; j <= jmax; j++ ) - { - M( i - imin, j - jmin ) = (*this)( i, j ); - } - return M; - } - - void Matrix::SetBlock( int imin, int imax, int jmin, int jmax, const Matrix &B ) - { - int ni = imax - imin + 1; - int nj = jmax - jmin + 1; - assert( ni == B.Rows() ); - assert( nj == B.Cols() ); - int k = imin * cols + jmin; - for( int i = 0; i < ni; i++ ) - for( int j = 0; j < nj; j++ ) - { - elem[ k + i * cols + j ] = B(i,j); - } - } - - void Matrix::SetBlock( int imin, int imax, int jmin, int jmax, const Vector &V ) - { - int k = imin * cols + jmin; - if( imin == imax ) - { - int nj = jmax - jmin + 1; - assert( nj == V.Size() ); - for( int j = 0; j < nj; j++ ) elem[ k + j ] = V(j); - } - else if( jmin == jmax ) - { - int ni = imax - imin + 1; - assert( ni == V.Size() ); - for( int i = 0; i < ni; i++ ) elem[ k + i * cols ] = V(i); - } - else - { - // This assertion will be false, and will signal an error. - assert( imin == imax || jmin == jmax ); - } - } - - Matrix &Matrix::SwapRows( int i1, int i2 ) - { - float temp; - float *r1 = elem + ( i1 * cols ); - float *r2 = elem + ( i2 * cols ); - for( register int j = 0; j < cols; j++ ) - { - temp = *r1; - *r1 = *r2; - *r2 = temp; - r1++; - r2++; - } - return *this; - } - - Matrix &Matrix::SwapCols( int j1, int j2 ) - { - float temp; - float *c1 = elem + j1; - float *c2 = elem + j2; - for( register int i = 0; i < rows; i++ ) - { - temp = *c1; - *c1 = *c2; - *c2 = temp; - c1 += cols; - c2 += cols; - } - return *this; - } - - /*-------------------------------------------------------------------------* - * * - * A S S I G N M E N T O P E R A T O R S * - * * - *-------------------------------------------------------------------------*/ - Matrix& Matrix::operator=( const Matrix &M ) - { - SetSize( M.Rows(), M.Cols() ); - register float *e = elem; - register float *m = M.Array(); - for( register int i = 0; i < rows * cols; i++ ) *e++ = *m++; - return *this; - } - - Matrix& Matrix::operator=( float s ) - { - register float *e = elem; - for( register int i = 0; i < rows * cols; i++ ) *e++ = s; - return *this; - } - - /*-------------------------------------------------------------------------* - * * - * O P E R A T O R S * - * * - *-------------------------------------------------------------------------*/ - Vector operator*( const Matrix &M, const Vector &A ) - { - // Handle the special case with translation built in. - if( M.Cols() == 4 && M.Rows() == 4 && A.Size() == 3 ) - { - Vector C(3); - C(0) = M(0,0) * A(0) + M(0,1) * A(1) + M(0,2) * A(2) + M(0,3); - C(1) = M(1,0) * A(0) + M(1,1) * A(1) + M(1,2) * A(2) + M(1,3); - C(2) = M(2,0) * A(0) + M(2,1) * A(1) + M(2,2) * A(2) + M(2,3); - return C; - } - assert( M.Cols() == A.Size() ); - Vector C( M.Rows() ); - float *m = M.Array(); - for( int i = 0; i < M.Rows(); i++ ) - { - register float *a = A.Array(); - register double sum = (*m++) * (*a++); - for( register int j = 1; j < M.Cols(); j++ ) - sum += (*m++) * (*a++); - C(i) = sum; - } - return C; - } - - Vector operator*( const Vector &A, const Matrix &M ) - { - assert( A.Size() == M.Rows() ); - Vector C( M.Cols() ); - for( register int j = 0; j < M.Cols(); j++ ) - { - register double sum = 0.0; - register float *a = A.Array(); - for( register int i = 0; i < M.Rows(); i++ ) - sum += (*a++) * M(i,j); - C(j) = sum; - } - return C; - } - - Vector& operator*=( Vector &A, const Matrix &M ) - { - // Handle the special case with translation built in. - if( M.Cols() == 4 && M.Rows() == 4 && A.Size() == 3 ) - { - float x = M(0,0) * A(0) + M(0,1) * A(1) + M(0,2) * A(2) + M(0,3); - float y = M(1,0) * A(0) + M(1,1) * A(1) + M(1,2) * A(2) + M(1,3); - float z = M(2,0) * A(0) + M(2,1) * A(1) + M(2,2) * A(2) + M(2,3); - A(0) = x; - A(1) = y; - A(2) = z; - return A; - } - assert( M.Cols() == A.Size() ); - Vector C( M.Rows() ); - float *m = M.Array(); - for( register int i = 0; i < M.Rows(); i++ ) - { - double sum = 0.0; - for( register int j = 0; j < A.Size(); j++ ) - sum += (*m++) * A(j); - C(i) = sum; - } - return A = C; - } - - Matrix& operator*=( Matrix &M, float s ) - { - register float *m = M.Array(); - for( register int i = 0; i < M.Rows() * M.Cols(); i++ ) *m++ *= s; - return M; - } - - Matrix& operator/=( Matrix &M, float s ) - { - assert( s != 0.0 ); - register float *m = M.Array(); - for( register int i = 0; i < M.Rows() * M.Cols(); i++ ) *m++ /= s; - return M; - } - - Matrix operator+( const Matrix &A, const Matrix &B ) - { - assert( A.Rows() == B.Rows() ); - assert( A.Cols() == B.Cols() ); - Matrix C( A.Rows(), A.Cols() ); - register float *a = A.Array(); - register float *b = B.Array(); - register float *c = C.Array(); - for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) (*c++) = (*a++) + (*b++); - return C; - } - - Matrix operator-( const Matrix &A, const Matrix &B ) - { - assert( A.Rows() == B.Rows() ); - assert( A.Cols() == B.Cols() ); - Matrix C( A.Rows(), A.Cols() ); - register float *a = A.Array(); - register float *b = B.Array(); - register float *c = C.Array(); - for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) (*c++) = (*a++) - (*b++); - return C; - } - - Matrix operator-( const Matrix &A ) - { - Matrix B( A.Cols(), A.Rows() ); - register float *a = A.Array(); - register float *b = B.Array(); - for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) - { - *b++ = -(*a++); - } - return B; - } - - Matrix& operator+=( Matrix &A, const Matrix &B ) - { - assert( A.Rows() == B.Rows() ); - assert( A.Cols() == B.Cols() ); - register float *a = A.Array(); - register float *b = B.Array(); - for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) (*a++) += (*b++); - return A; - } - - Matrix operator*( const Matrix &A, const Matrix &B ) - { - assert( A.Cols() == B.Rows() ); - Matrix M( A.Rows(), B.Cols() ); - for( register int i = 0; i < A.Rows(); i++ ) - for( register int j = 0; j < B.Cols(); j++ ) - { - double sum = 0.0; - for( register int k = 0; k < A.Cols(); k++ ) sum += A(i,k) * B(k,j); - M(i,j) = sum; - } - return M; - } - - Matrix operator*( float s, const Matrix &A ) - { - Matrix B( A.Cols(), A.Rows() ); - register float *a = A.Array(); - register float *b = B.Array(); - for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) - { - *b++ = s * (*a++); - } - return B; - } - - Matrix operator*( const Matrix &A, float s ) - { - Matrix B( A.Cols(), A.Rows() ); - register float *a = A.Array(); - register float *b = B.Array(); - for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) - { - *b++ = s * (*a++); - } - return B; - } - - Matrix operator/( const Matrix &A, float s ) - { - assert( s != 0.0 ); - Matrix B( A.Cols(), A.Rows() ); - register float *a = A.Array(); - register float *b = B.Array(); - for( register int i = 0; i < A.Rows() * A.Cols(); i++ ) - { - *b++ = (*a++) / s; - } - return B; - } - - Matrix& operator*=( Matrix &A, const Matrix &B ) - { - assert( A.Cols() == B.Rows() ); - Vector R( B.Cols() ); - for( register int i = 0; i < A.Rows(); i++ ) - { - for( register int j = 0; j < B.Cols(); j++ ) // Compute the ith row of A * B. - { - double sum = A(i,0) * B(0,j); - for( register int k = 1; k < A.Cols(); k++ ) sum += A(i,k) * B(k,j); - R(j) = sum; - } - // Copy the new i'th row back into A. - for( register int k = 0; k < A.Cols(); k++ ) A(i,k) = R(k); - } - return A; - } - - /*-------------------------------------------------------------------------* - * * - * M I S C E L L A N E O U S F U N C T I O N S * - * * - *-------------------------------------------------------------------------*/ - Matrix Transp( const Matrix &M ) - { - Matrix T( M.Cols(), M.Rows() ); - register float *m = M.Array(); - for( register int i = 0; i < M.Rows(); i++ ) - for( register int j = 0; j < M.Cols(); j++ ) T(j,i) = *m++; - return T; - } - - // Computes A * Transp(A). - Matrix AATransp( const Matrix &A ) - { - int n = A.Rows(); - Matrix B( n, n ); - for( register int i = 0; i < n; i++ ) - for( register int j = 0; j < n; j++ ) - { - double sum = 0.0; - for( register int k = 0; k < A.Cols(); k++ ) - sum += A(i,k) * A(j,k); - B(i,j) = sum; - } - return B; - } - - // Computes Transp(A) * A. - Matrix ATranspA( const Matrix &A ) - { - int n = A.Cols(); - Matrix B( n, n ); - for( register int i = 0; i < n; i++ ) - for( register int j = 0; j < n; j++ ) - { - double sum = 0.0; - for( register int k = 0; k < A.Rows(); k++ ) - sum += A(k,i) * A(k,j); - B(i,j) = sum; - } - return B; - } - - // Computes the outer product of the vectors A and B. - Matrix Outer( const Vector &A, const Vector &B ) - { - Matrix M( A.Size(), B.Size() ); - for( register int i = 0; i < A.Size(); i++ ) - { - float c = A(i); - for( register int j = 0; j < B.Size(); j++ ) M(i,j) = c * B(j); - } - return M; - } - - // Computes the L1-norm of the matrix A, which is the maximum absolute - // row sum. - double OneNorm( const Matrix &A ) - { - double norm = 0.0; - for( register int i = 0; i < A.Rows(); i++ ) - { - double sum = 0.0; - for( register int j = 0; j < A.Cols(); j++ ) sum += Abs( A(i,j) ); - if( sum > norm ) norm = sum; - } - return norm; - } - - // Computes the L-infinity norm of the matrix A, which is the maximum - // absolute column sum. - double SupNorm( const Matrix &A ) - { - double norm = 0.0; - for( register int j = 0; j < A.Cols(); j++ ) - { - double sum = 0.0; - for( register int i = 0; i < A.Rows(); i++ ) sum += Abs( A(i,j) ); - if( sum > norm ) norm = sum; - } - return norm; - } - - // Returns the square matrix with the elements of the vector d along - // its diagonal. - Matrix Diag( const Vector &d ) - { - Matrix D( d.Size() ); - for( register int i = 0; i < d.Size(); i++ ) D(i,i) = d(i); - return D; - } - - // Returns the 3 x 3 diagonal matrix with x, y, and z as its diagonal - // elements. - Matrix Diag( float x, float y, float z ) - { - Matrix D(3,3); - D(0,0) = x; - D(1,1) = y; - D(2,2) = z; - return D; - } - - // Returns the vector consisting of the diagonal elements of the - // matrix M, which need not be square. - Vector Diag( const Matrix &M ) - { - int m = Min( M.Rows(), M.Cols() ); - Vector V(m); - for( register int i = 0; i < m; i++ ) V(i) = M(i,i); - return V; - } - - // Returns the n x n identity matrix. - Matrix Ident( int n ) - { - Matrix I( n ); - for( register int i = 0; i < n; i++ ) I(i,i) = 1.0; - return I; - } - - // Determines whether the matrix M is "Null" -- i.e. has zero rows - // or columns. - int Null( const Matrix &M ) - { - return M.Rows() == 0 || M.Cols() == 0; - } - - int Square( const Matrix &M ) - { - return M.Rows() == M.Cols(); - } - - // Convert a "vector-shaped" matrix to a vector. That is, represent a - // matrix with a single row or a single column as a vector. - Vector ToVector( const Matrix &M ) - { - if( M.Rows() == 1 ) - { - Vector V( M.Cols() ); - for( int j = 0; j < M.Cols(); j++ ) V(j) = M(0,j); - return V; - } - else if( M.Cols() == 1 ) - { - Vector V( M.Rows() ); - for( int i = 0; i < M.Rows(); i++ ) V(i) = M(i,0); - return V; - } - else - { - // Report an error. - assert( M.Rows() == 1 || M.Cols() == 1 ); - } - return Vector(); - } - - std::ostream &operator<<( std::ostream &out, const Matrix &M ) - { - if( M.Rows() == 0 || M.Cols() == 0 ) - { - out << "NULL" << std::endl; - } - else for( register int i = 0; i < M.Rows(); i++ ) - { - out << form( "%3d: ", i ); - for( register int j = 0; j < M.Cols(); j++ ) - out << form( " %10.5g", M(i,j) ); - out << std::endl; - } - return out; - } - - /*-------------------------------------------------------------------------* - * R O T A T I O N * - * * - * Builds a 3x3 modeling matrix that performs a rotation about an * - * arbitrary axis. The rotation is right-handed about this axis and * - * "angle" is taken to be in radians. The only error that can occur is * - * when "axis" is the zero-vector. * - * * - *-------------------------------------------------------------------------*/ - Matrix Rotation( const Vector &Axis, float angle ) - { - // Compute a unit quaternion (a,b,c,d) that performs the rotation. - - float t = TwoNormSqr( Axis ); - if( t == 0.0 ) return Matrix(3,3); - t = sin( angle * 0.5 ) / sqrt( t ); - - // Fill in the entries of the quaternion. - - float a = cos( angle * 0.5 ); - float b = t * Axis(0); - float c = t * Axis(1); - float d = t * Axis(2); - - // Compute all the double products of a, b, c, and d, except a * a. - - float bb = b * b; - float cc = c * c; - float dd = d * d; - float ab = a * b; - float ac = a * c; - float ad = a * d; - float bc = b * c; - float bd = b * d; - float cd = c * d; - - // Fill in the entries of the rotation matrix. - - Matrix R(3,3); - - R(0,0) = 1.0 - 2.0 * ( cc + dd ); - R(0,1) = 2.0 * ( bc + ad ); - R(0,2) = 2.0 * ( bd - ac ); - - R(1,0) = 2.0 * ( bc - ad ); - R(1,1) = 1.0 - 2.0 * ( bb + dd ); - R(1,2) = 2.0 * ( cd + ab ); - - R(2,0) = 2.0 * ( bd + ac ); - R(2,1) = 2.0 * ( cd - ab ); - R(2,2) = 1.0 - 2.0 * ( bb + cc ); - - return R; - } - - /*-------------------------------------------------------------------------* - * R O T A T I O N * - * * - * Builds a 4x4 modeling matrix that performs a rotation about an * - * arbitrary axis through an arbitrary point. The rotation is * - * right-handed about this axis and "angle" is taken to be in radians. * - * * - *-------------------------------------------------------------------------*/ - Matrix Rotation( const Vector &Axis, const Vector &Origin, float angle ) - { - Matrix R = Rotation( Axis, angle ); // A simple 3x3 rotation. - Matrix M = Ident(4); // A 4x4 including translation. - - // Compute the last row of the matrix (the translation) using the - // 3x3 rotation matrix. We need to compute the last row of the 4x4 - // matrix that performs Translate( -Origin ) * Rotate * Translate( Origin ). - // - // | I p | | R 0 | | I -p | | R p - Rp | - // | | | | | | = | | - // | 0 1 | | 0 1 | | 0 1 | | 0 1 | - // - // So, the desired column is p - R p. - - Vector V( Origin - R * Origin ); - for( int i = 0; i < 3; i++ ) - { - M(i,3) = V(i); - for( int j = 0; j < 3; j++ ) - M(i,j) = R(i,j); - } - return M; - } - - /*-------------------------------------------------------------------------* - * X R O T A T I O N * - * * - * Builds a 3x3 modeling matrix that performs a rotation about the X-axis. * - * * - *-------------------------------------------------------------------------*/ - Matrix Xrotation( float angle ) - { - Matrix M = Ident(3); - float c = cos( angle ); - float s = sin( angle ); - M(1,1) = c; M(1,2) = -s; - M(2,1) = s; M(2,2) = c; - return M; - } - - /*-------------------------------------------------------------------------* - * Y R O T A T I O N * - * * - * Builds a 3x3 modeling matrix that performs a rotation about the Y-axis. * - * * - *-------------------------------------------------------------------------*/ - Matrix Yrotation( float angle ) - { - Matrix M = Ident(3); - float c = cos( angle ); - float s = sin( angle ); - M(0,0) = c; M(0,2) = -s; - M(2,0) = s; M(2,2) = c; - return M; - } - - /*-------------------------------------------------------------------------* - * Z R O T A T I O N * - * * - * Builds a 3x3 modeling matrix that performs a rotation about the Z-axis. * - * * - *-------------------------------------------------------------------------*/ - Matrix Zrotation( float angle ) - { - Matrix M = Ident(3); - float c = cos( angle ); - float s = sin( angle ); - M(0,0) = c; M(0,1) = -s; - M(1,0) = s; M(1,1) = c; - return M; - } - - /*-------------------------------------------------------------------------* - * H O U S E H O L D E R * - * * - * Returns the Householder reflection matrix that reflects through the * - * plane orthogonal to V. The vector V is not assumed to be normalized. * - * * - *-------------------------------------------------------------------------*/ - Matrix Householder( const Vector &V ) - { - Matrix I = Ident( V.Size() ); - float c = 2.0 / ( V * V ); - return I - Outer( c * V, V ); - } - - /*=========================================================================* - * R O T A T I O N Author: Jim Arvo, 1991 * - * * - * This routine maps three values (x1, x2, x3) in the range [0,1] into * - * a 3x3 rotation matrix, M. Uniformly distributed random variables * - * x1, x2, and x3 create uniformly distributed random rotation matrices. * - * To create small uniformly distributed "perturbations", supply * - * samples in the following ranges * - * * - * x1 in [ 0, d ] * - * x2 in [ 0, 1 ] * - * x3 in [ 0, d ] * - * * - * where 0 < d < 1 controls the size of the perturbation. Any of the * - * random variables may be stratified (or "jittered") for a slightly more * - * even distribution. * - * * - *=========================================================================*/ - Matrix Rotation( float x1, float x2, float x3 ) - { - Matrix M(3,3); - float theta = x1 * TwoPi; // Rotation about the pole (Z). - float phi = x2 * TwoPi; // For direction of pole deflection. - float z = x3 * 2.0; // For magnitude of pole deflection. - - // Compute a vector V used for distributing points over the sphere - // via the reflection I - V Transpose(V). This formulation of V - // will guarantee that if x1 and x2 are uniformly distributed, - // the reflected points will be uniform on the sphere. Note that V - // has length sqrt(2) to eliminate the 2 in the Householder matrix. - - float r = sqrt( z ); - float Vx = sin( phi ) * r; - float Vy = cos( phi ) * r; - float Vz = sqrt( 2.0 - z ); - - // Compute the row vector S = Transpose(V) * R, where R is a simple - // rotation by theta about the z-axis. No need to compute Sz since - // it's just Vz. - - float st = sin( theta ); - float ct = cos( theta ); - float Sx = Vx * ct - Vy * st; - float Sy = Vx * st + Vy * ct; - - // Construct the rotation matrix ( V Transpose(V) - I ) R, which - // is equivalent to V S - R. - - M(0,0) = Vx * Sx - ct; - M(0,1) = Vx * Sy - st; - M(0,2) = Vx * Vz; - - M(1,0) = Vy * Sx + st; - M(1,1) = Vy * Sy - ct; - M(1,2) = Vy * Vz; - - M(2,0) = Vz * Sx; - M(2,1) = Vz * Sy; - M(2,2) = 1.0 - z; // This equals Vz * Vz - 1.0 - - return M; - } - - /*-------------------------------------------------------------------------* - * P A R T I A L P I V O T * - * * - * Look for the element with the largest magnitude on or below the * - * diagonal in column "col" of the matrix A. Bring this element to the * - * diagonal by a row interchange. Perform the same row interchange on b. * - * * - *-------------------------------------------------------------------------*/ - static int PartialPivot( int col, Matrix &A, Vector &b ) - { - int n = A.Cols(); - float a_max = Abs( A( col, col ) ); - int i_max = col; - for( int i = col + 1; i < n; i++ ) - { - float temp = Abs( A( i, col ) ); - if( temp > a_max ) - { - a_max = temp; - i_max = i; - } - } - if( a_max == 0.0 ) return 0; - if( i_max != col ) - { - A.SwapRows( col, i_max ); - b.Swap ( col, i_max ); - } - return 1; - } - - /*-------------------------------------------------------------------------* - * G A U S S I A N E L I M I N A T I O N * - * * - * Solves the linear system A x = b using Gaussian elimination, with or * - * without partial pivoting. * - * * - *-------------------------------------------------------------------------*/ - int GaussElimination( const Matrix &A, const Vector &b, Vector &x, pivot_type pivot ) - { - assert( Square( A ) ); - assert( A.Rows() == b.Size() ); - Matrix B( A ); - Vector c( b ); - x.SetSize( A.Cols() ); - int m = B.Rows(); - register int i, j, k; - - // Perform Gaussian elimination on the copies, B and c. - - for( i = 0; i < m; i++ ) - { - if( pivot == pivot_partial ) PartialPivot( i, B, c ); - - for( j = i + 1; j < m; j++ ) - { - double scale = -B(j,i) / B(i,i); - for( k = i; k < m; k++ ) - B(j,k) += scale * B(i,k); - B(j,i) = 0.0; - c(j) += scale * c(i); - } - } - - // Now solve by back substitution. - - for( i = m - 1; i >= 0; i-- ) - { - double a = 0.0; - for( j = i + 1; j < m; j++ ) a += B(i,j) * x(j); - x(i) = ( c(i) - a ) / B(i,i); - } - - return 1; - } - - /*-------------------------------------------------------------------------* - * L E A S T S Q U A R E S * - * * - * Solves the normal equations associated with the system A x = b, which * - * are given by Transp(A) A x = Transp(A) b. * - * * - *-------------------------------------------------------------------------*/ - int LeastSquares( const Matrix &A, const Vector &b, Vector &x ) - { - // - // Set up and solve the normal equations Transp(A) A x = Transp(A) b. - // Note that Transp(A) * b is computed here as b * A. - // - GaussElimination( ATranspA(A), b * A, x ); - return 1; - } - - /*-------------------------------------------------------------------------* - * D E T E R M I N A N T * - * * - * Computes the determinant of the n by n matrix M using Householder * - * transformations. * - * * - *-------------------------------------------------------------------------*/ - double Determinant( const Matrix &M ) - { - static const float MachEps = MachineEpsilon(); - assert( Square(M) ); - - double dot; - int k; - Matrix A = M; // Make a copy that we can destroy. - double det = 1.0; // Multiply diagonal elements as they are generated. - int sign = 1; // Keep track of sign (each reflection has det -1). - int n = M.Cols(); - - for( int i = 0; i < n - 1; i++ ) - { - // Compute the 2-norm of the first column of the (n-i)x(n-i) submatrix. - - dot = 0.0; - for( k = i; k < n; k++ ) dot += Sqr( A(k,i) ); - - double Xnorm = sqrt( dot ); - if( Xnorm == 0.0 ) return 0.0; - - // This norm is another diagonal element of the upper triangular - // matrix, so we multiply it into the running product for det. - - det *= Xnorm; - - // If X is already of the right form we must not perform the - // processing because V will be zero. - - float x1 = Abs( A(i,i) ); - float diff = Abs( Xnorm - x1 ); - if( diff < MachEps * Max( Xnorm, x1 ) ) continue; // This column is okay as is. - - // Each Householder transformation has a determinant of -1, - // so we must keep track of how many we apply. - - sign *= -1; - - // Compute the V vector, which will define the Householder - // transformation via H = I - V transp(V). Leave it in the - // i'th column of A. V = sqrt(2) * Normalized( X - ( Xnorm, 0, 0,... ) ). - - float scale = 1.0 / sqrt( Xnorm * Abs( A(i,i) - Xnorm ) ); // sqrt(2) / || p || - A(i,i) = ( A(i,i) - Xnorm ) * scale; - for( k = i + 1; k < n; k++ ) A(k,i) *= scale; - - // Now apply the transformation I - V Transp(V) to all the remaining columns, - // except for the first row. - - for( int j = i + 1; j < n; j++ ) - { - // Compute Y dot V. - - dot = 0.0; - for( k = i; k < n; k++ ) dot += A(k,i) * A(k,j); - - // Subtract V ( V dot A(*,j) ) from A(*,j), ignoring the first row. - - for( k = i + 1; k < n; k++ ) A(k,j) -= A(k,i) * dot; - - } // for j - - } // for i - - // Now multiply in the very last element of the matrix and - // the accumulated sign. - - return det * A(n-1,n-1) * sign; - } - - /*-------------------------------------------------------------------------* - * C O F A C T O R * - * * - * Computes the (i,j) cofactor of the n by n matrix M. * - * * - *-------------------------------------------------------------------------*/ - double Matrix::Cofactor( int omit_i, int omit_j ) const - { - assert( Square( *this ) ); - assert( omit_i >= 0 && omit_j >= 0 ); - assert( omit_i < Rows() ); - assert( omit_j < Cols() ); - - // Create a new matrix that is smaller by one in both dimensions and - // copy the old matrix into it, omitting the specified row and column. - - Matrix A( Rows() - 1, Cols() - 1 ); - for( int i = 0; i < Rows() - 1; i++ ) - { - int ii = ( i < omit_i ) ? i : i + 1; - for( int j = 0; j < Cols() - 1; j++ ) - { - int jj = ( j < omit_j ) ? j : j + 1; - A( i, j ) = (*this)(ii,jj); - } - } - - // Return the determinant of the smaller matrix. - - return Determinant( A ); - } - - /*-------------------------------------------------------------------------* - * A D J O I N T * - * * - * Computes the adjoint of a matrix. * - * * - *-------------------------------------------------------------------------*/ - Matrix Adjoint( const Matrix &M ) - { - double det; - return Adjoint( M, det ); // Discard the determinant. - } - - Matrix Adjoint( const Matrix &M, double &det ) - { - int n = M.Rows(); - det = 0.0; - Matrix A( n, n ); - assert( Square(M) ); - if( n == 3 ) - { - A(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1); - A(0,1) = M(1,2) * M(2,0) - M(1,0) * M(2,2); - A(0,2) = M(1,0) * M(2,1) - M(1,1) * M(2,0); - - A(1,0) = M(0,2) * M(2,1) - M(0,1) * M(2,2); - A(1,1) = M(0,0) * M(2,2) - M(0,2) * M(2,0); - A(1,2) = M(0,1) * M(2,0) - M(0,0) * M(2,1); - - A(2,0) = M(0,1) * M(1,2) - M(0,2) * M(1,1); - A(2,1) = M(0,2) * M(1,0) - M(0,0) * M(1,2); - A(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0); - - det = A(0,0) * M(0,0) + A(1,0) * M(1,0) + A(2,0) * M(2,0); - } - else - { - for( register int i = 0; i < n; i++ ) - { - for( register int j = 0; j < n; j++ ) - { - if( Odd( i + j ) ) - A(i,j) = -M.Cofactor(i,j); - else A(i,j) = M.Cofactor(i,j); - } - det += M(i,0) * A(i,0); - } - } - return A; - } - - /*-------------------------------------------------------------------------* - * I N V E R S E * - * * - * Computes the inverse of a square matrix. * - * * - *-------------------------------------------------------------------------*/ - Matrix Inverse( const Matrix &M ) - { - assert( Square( M ) ); - int n = M.Cols(); - Matrix Inv( n, n ); - Vector b( n ), x( n ); - - for( int i = 0; i < n; i++ ) - { - if( i > 0 ) b( i - 1 ) = 0.0; - b(i) = 1.0; - GaussElimination( M, b, x ); - Inv.SetCol( i, x ); - } - return Inv; - } - - /*-------------------------------------------------------------------------* - * T R A C E * - * * - * Computes the trace of a square matrix. * - * * - *-------------------------------------------------------------------------*/ - extern double Trace( const Matrix &M ) - { - assert( Square(M) ); - double trace = M(0,0); - for( int i = 1; i < M.Cols(); i++ ) trace += M(i,i); - return trace; - } -}; - - - -/* - -C -C Subroutine GAUSS solves the the system Ax = b by using Gaussian elimination. -C - -SUBROUTINE GAUSS( A, B, X, LDA, N, IFLAG ) -REAL A( LDA, N ), B( N ), X( N ) - -DO 300 I = 1 , N - 1 -I2 = I -CALL PIVOT( A, B, LDA, N, I2, IFLAG ) -IF ( IFLAG .LT. 0 ) RETURN -DO 200 J = I + 1 , N -TEMP = A( J , I ) / A( I , I ) -A( J , I ) = 0.0 -B( J ) = B( J ) - TEMP * B( I ) -DO 100 K = I + 1 , N -A( J , K ) = A( J , K ) - TEMP * A( I , K ) -100 CONTINUE -200 CONTINUE -300 CONTINUE - -X( N ) = B( N ) / A( N , N ) -DO 500 I = N - 1 , 1 , -1 -TEMP = 0.0 -DO 400 J = I + 1 , N -TEMP = TEMP + A( I , J ) * X( J ) -400 CONTINUE -X( I ) = ( B( I ) - TEMP ) / A( I , I ) -500 CONTINUE - -RETURN -END - - - -SUBROUTINE PIVOT( A, B, LDA, N, J, IFLAG ) -REAL A( LDA, N ), B( N ), AMAX, TEMP -DATA TOL / 1.0E-6 / - -IFLAG = -1 -IF ( J .GT. N ) RETURN -IF ( J .EQ. N .AND. ABS( A(N,N) ) .LT. TOL ) RETURN -IF ( J .EQ. N ) GO TO 40 - -AMAX = ABS( A( J , J ) ) -INDEX = J -10 DO 20 I = J + 1 , N -IF ( ABS( A( I , J ) ) .LE. AMAX ) GO TO 20 -AMAX = ABS( A( I , J ) ) -INDEX = I -20 CONTINUE - -IF ( AMAX .LT. TOL ) RETURN - -TEMP = B( J ) -B( J ) = B( INDEX ) -B( INDEX ) = TEMP - -DO 30 K = 1 , N -TEMP = A( J , K ) -A( J , K ) = A( INDEX , K ) -A( INDEX , K ) = TEMP -30 CONTINUE - -40 IFLAG = 1 -RETURN -END - - -*/ - - - - - diff --git a/src/nvtt/bc7/arvo/Matrix.h b/src/nvtt/bc7/arvo/Matrix.h deleted file mode 100644 index 1832c8f..0000000 --- a/src/nvtt/bc7/arvo/Matrix.h +++ /dev/null @@ -1,142 +0,0 @@ -/*************************************************************************** -* Matrix.h * -* * -* General Vector and Matrix classes, with all the associated methods. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 08/16/2000 Revamped for CIT tools. * -* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * -* arvo 06/30/1993 Added singular value decomposition class. * -* arvo 06/25/1993 Major revisions. * -* arvo 09/08/1991 Initial implementation. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 2000, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __MATRIX_INCLUDED__ -#define __MATRIX_INCLUDED__ - -#include -#include "Vector.h" - -namespace ArvoMath { - - class Matrix { - public: - Matrix( const Matrix & ); - Matrix( int num_rows = 0, int num_cols = 0, float value = 0.0 ); - ~Matrix(); - Matrix &operator=( const Matrix &M ); - Matrix &operator=( float s ); - Vector GetCol( int col ) const; - Vector GetRow( int row ) const; - void SetCol( int col, const Vector & ); - void SetRow( int row, const Vector & ); - Matrix GetBlock( int imin, int imax, int jmin, int jmax ) const; - void SetBlock( int imin, int imax, int jmin, int jmax, const Matrix & ); - void SetBlock( int imin, int imax, int jmin, int jmax, const Vector & ); - Matrix &SwapRows( int i1, int i2 ); - Matrix &SwapCols( int j1, int j2 ); - void SetSize( int rows, int cols = 0 ); - double Cofactor( int i, int j ) const; - static const Matrix Null; - - public: // Inlined functions. - inline float operator()( int i, int j ) const { return elem[ i * cols + j ]; } - inline float &operator()( int i, int j ) { return elem[ i * cols + j ]; } - inline int Rows () const { return rows; } - inline int Cols () const { return cols; } - inline float *Array () const { return elem; } - - private: - int rows; // Number of rows in the matrix. - int cols; // Number of columns in the matrix. - float *elem; // Pointer to the actual data. - }; - - - extern Vector operator * ( const Matrix &, const Vector & ); - extern Vector operator * ( const Vector &, const Matrix & ); - extern Vector& operator *= ( Vector &, const Matrix & ); - extern Matrix Outer ( const Vector &, const Vector & ); // Outer product. - extern Matrix operator + ( const Matrix &, const Matrix & ); - extern Matrix operator - ( const Matrix & ); - extern Matrix operator - ( const Matrix &, const Matrix & ); - extern Matrix operator * ( const Matrix &, const Matrix & ); - extern Matrix operator * ( const Matrix &, float ); - extern Matrix operator * ( float , const Matrix & ); - extern Matrix operator / ( const Matrix &, float ); - extern Matrix& operator += ( Matrix &, const Matrix & ); - extern Matrix& operator *= ( Matrix &, float ); - extern Matrix& operator *= ( Matrix &, const Matrix & ); - extern Matrix& operator /= ( Matrix &, float ); - extern Matrix Ident ( int n ); - extern Matrix Householder ( const Vector & ); - extern Matrix Rotation ( const Vector &Axis, float angle ); - extern Matrix Rotation ( const Vector &Axis, const Vector &Origin, float angle ); - extern Matrix Rotation ( float, float, float ); // For random 3D rotations. - extern Matrix Xrotation ( float ); - extern Matrix Yrotation ( float ); - extern Matrix Zrotation ( float ); - extern Matrix Diag ( const Vector & ); - extern Vector Diag ( const Matrix & ); - extern Matrix Diag ( float, float, float ); - extern Matrix Adjoint ( const Matrix & ); - extern Matrix Adjoint ( const Matrix &, double &det ); - extern Matrix AATransp ( const Matrix & ); - extern Matrix ATranspA ( const Matrix & ); - extern double OneNorm ( const Matrix & ); - extern double SupNorm ( const Matrix & ); - extern double Determinant ( const Matrix & ); - extern double Trace ( const Matrix & ); - extern Matrix Transp ( const Matrix & ); - extern Matrix Inverse ( const Matrix & ); - extern int Null ( const Matrix & ); - extern int Square ( const Matrix & ); - extern Vector ToVector ( const Matrix & ); // Only for vector-shaped matrices. - - enum pivot_type { - pivot_off, - pivot_partial, - pivot_total - }; - - extern int GaussElimination( - const Matrix &A, - const Vector &b, // This is the right-hand side. - Vector &x, // This is the matrix we are solving for. - pivot_type = pivot_off - ); - - extern int LeastSquares( - const Matrix &A, - const Vector &b, - Vector &x - ); - - extern int WeightedLeastSquares( - const Matrix &A, - const Vector &b, - const Vector &w, - Vector &x - ); - - std::ostream &operator<<( - std::ostream &out, - const Matrix & - ); -}; - -#endif diff --git a/src/nvtt/bc7/arvo/Perm.cpp b/src/nvtt/bc7/arvo/Perm.cpp deleted file mode 100644 index 87e98e3..0000000 --- a/src/nvtt/bc7/arvo/Perm.cpp +++ /dev/null @@ -1,503 +0,0 @@ -/*************************************************************************** -* Perm.C * -* * -* This file defines permutation class: that is, a class for creating and * -* manipulating finite sequences of distinct integers. The main feature * -* of the class is the "++" operator that can be used to step through all * -* N! permutations of a sequence of N integers. As the set of permutations * -* forms a multiplicative group, a multiplication operator and an * -* exponentiation operator are also defined. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 07/01/93 Added the Partition class. * -* arvo 03/23/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1999, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include -#include "Perm.h" -#include "ArvoMath.h" -#include "Char.h" - -namespace ArvoMath { - - /*************************************************************************** - * - * L O C A L F U N C T I O N S - * - ***************************************************************************/ - - static void Reverse( int *p, int n ) - { - int k = n >> 1; - int m = n - 1; - for( int i = 0; i < k; i++ ) Swap( p[i], p[m-i] ); - } - - static void Error( char *msg ) - { - fprintf( stderr, "ERROR: Perm, %s.\n", msg ); - } - - /*************************************************************************** - ** - ** M E M B E R F U N C T I O N S - ** - ***************************************************************************/ - - Perm::Perm( int Left, int Right ) - { - a = ( Left < Right ) ? Left : Right; - b = ( Left > Right ) ? Left : Right; - p = new int[ Size() ]; - Reset( *this ); - } - - Perm::Perm( const Perm &Q ) - { - a = Q.Min(); - b = Q.Max(); - p = new int[ Q.Size() ]; - for( int i = 0; i < Size(); i++ ) p[i] = Q[i]; - } - - Perm::Perm( const char *str ) - { - (*this) = str; - } - - Perm &Perm::operator=( const char *str ) - { - int k, m = 0, n = 0; - char dig[10]; - char c; - if( p != NULL ) delete[] p; - p = new int[ strlen(str)/2 + 1 ]; - for(;;) - { - c = *str++; - if( isDigit(c) ) dig[m++] = c; - else if( m > 0 ) - { - dig[m] = NullChar; - sscanf( dig, "%d", &k ); - if( n == 0 ) a = k; else if( k < a ) a = k; - if( n == 0 ) b = k; else if( k > b ) b = k; - p[n++] = k; - m = 0; - } - if( c == NullChar ) break; - } - for( int i = 0; i < n; i++ ) - { - int N = i + a; - int okay = 0; - for( int j = 0; j < n; j++ ) - if( p[j] == N ) { okay = 1; break; } - if( !okay ) - { - Error( "string is not a valid permutation" ); - return *this; - } - } - return *this; - } - - void Perm::Get( char *str ) const - { - for( int i = 0; i < Size(); i++ ) - str += sprintf( str, "%d ", p[i] ); - *str = NullChar; - } - - int Perm::Next() - { - int i, m, k = 0; - int N, M = 0; - - // Look for the first element of p that is larger than its successor. - // If no such element exists, we are done. - - M = p[0]; // M is always the "previous" value. - for( i = 1; i < Size(); i++ ) // Now start with second element. - { - if( p[i] > M ) { k = i; break; } - M = p[i]; - } - if( k == 0 ) return 0; // Already in descending order. - m = k - 1; - - // Find the largest entry before k that is less than p[k]. - // One exists because p[k] is bigger than M, i.e. p[k-1]. - - N = p[k]; - for( i = 0; i < k - 1; i++ ) - { - if( p[i] < N && p[i] > M ) { M = p[i]; m = i; } - } - Swap( p[m], p[k] ); // Entries 0..k-1 are still decreasing. - Reverse( p, k ); // Make first k elements increasing. - return 1; - } - - int Perm::Prev() - { - int i, m, k = 0; - int N, M = 0; - - // Look for the first element of p that is less than its successor. - // If no such element exists, we are done. - - M = p[0]; // M will always be the "previous" value. - for( i = 1; i < Size(); i++ ) // Start with the second element. - { - if( p[i] < M ) { k = i; break; } - M = p[i]; - } - if( k == 0 ) return 0; // Already in ascending order. - m = k - 1; - - // Find the smallest entry before k that is greater than p[k]. - // One exists because p[k] is less than M, i.e. p[k-1]. - - N = p[k]; - for( i = 0; i < k - 1; i++ ) - { - if( p[i] > N && p[i] < M ) { M = p[i]; m = i; } - } - Swap( p[m], p[k] ); // Entries 0..k-1 are still increasing. - Reverse( p, k ); // Make first k elements decreasing. - return 1; - } - - - /*************************************************************************** - ** - ** O P E R A T O R S - ** - ***************************************************************************/ - - int Perm::operator++() - { - return Next(); - } - - int Perm::operator--() - { - return Prev(); - } - - Perm &Perm::operator+=( int n ) - { - int i; - if( n > 0 ) for( i = 0; i < n; i++ ) if( !Next() ) break; - if( n < 0 ) for( i = n; i < 0; i++ ) if( !Prev() ) break; - return *this; - } - - Perm &Perm::operator-=( int n ) - { - int i; - if( n > 0 ) for( i = 0; i < n; i++ ) if( !Prev() ) break; - if( n < 0 ) for( i = n; i < 0; i++ ) if( !Next() ) break; - return *this; - } - - int Perm::operator[]( int n ) const - { - if( n < 0 || Size() <= n ) - { - Error( "permutation index[] out of range" ); - return 0; - } - return p[ n ]; - } - - int Perm::operator()( int n ) const - { - if( n < Min() || Max() < n ) - { - Error( "permutation index() out of range" ); - return 0; - } - return p[ n - Min() ]; - } - - Perm &Perm::operator=( const Perm &Q ) - { - if( Size() != Q.Size() ) - { - delete[] p; - p = new int[ Q.Size() ]; - } - a = Q.Min(); - b = Q.Max(); - for( int i = 0; i < Size(); i++ ) p[i] = Q[i]; - return *this; - } - - Perm Perm::operator*( const Perm &Q ) const - { - if( Min() != Q.Min() ) return Perm(0); - if( Max() != Q.Max() ) return Perm(0); - Perm A( Min(), Max() ); - for( int i = 0; i < Size(); i++ ) A.Elem(i) = p[ Q[i] - Min() ]; - return A; - } - - Perm Perm::operator^( int n ) const - { - Perm A( Min(), Max() ); - int pn = n; - if( n < 0 ) // First compute the inverse. - { - for( int i = 0; i < Size(); i++ ) - A.Elem( p[i] - Min() ) = i + Min(); - pn = -n; - } - for( int i = 0; i < Size(); i++ ) - { - int k = ( n < 0 ) ? A[i] : p[i]; - for( int j = 1; j < pn; j++ ) k = p[ k - Min() ]; - A.Elem(i) = k; - } - return A; - } - - Perm &Perm::operator()( int i, int j ) - { - Swap( p[ i - Min() ], p[ j - Min() ] ); - return *this; - } - - int Perm::operator==( const Perm &Q ) const - { - int i; - if( Min() != Q.Min() ) return 0; - if( Max() != Q.Max() ) return 0; - for( i = 0; i < Size(); i++ ) if( p[i] != Q[i] ) return 0; - return 1; - } - - int Perm::operator<=( const Perm &Q ) const - { - int i; - if( Min() != Q.Min() ) return 0; - if( Max() != Q.Max() ) return 0; - for( i = 0; i < Size(); i++ ) if( p[i] != Q[i] ) return p[i] < Q[i]; - return 1; - } - - void Reset( Perm &P ) - { - for( int i = 0; i < P.Size(); i++ ) P.Elem(i) = P.Min() + i; - } - - int End( const Perm &P ) - { - int c = P[0]; - for( int i = 1; i < P.Size(); i++ ) - { - if( c < P[i] ) return 0; - c = P[i]; - } - return 1; - } - - void Print( const Perm &P ) - { - if( P.Size() > 0 ) - { - printf( "%d", P[0] ); - for( int i = 1; i < P.Size(); i++ ) printf( " %d", P[i] ); - printf( "\n" ); - } - } - - int Even( const Perm &P ) - { - return !Odd( P ); - } - - int Odd( const Perm &P ) - { - int count = 0; - Perm Q( P ); - for( int i = P.Min(); i < P.Max(); i++ ) - { - if( Q(i) == i ) continue; - for( int j = P.Min(); j <= P.Max(); j++ ) - { - if( j == i ) continue; - if( Q(j) == i ) - { - Q(i,j); - count = ( j - i ) + ( count % 2 ); - } - } - } - return count % 2; - } - - - /*************************************************************************** - ** - ** P A R T I T I O N S - ** - ***************************************************************************/ - - Partition::Partition( ) - { - Bin = NULL; - bins = 0; - balls = 0; - } - - Partition::Partition( const Partition &Q ) - { - Bin = new int[ Q.Bins() ]; - bins = Q.Bins(); - balls = Q.Balls(); - for( int i = 0; i < bins; i++ ) Bin[i] = Q[i]; - } - - Partition::Partition( int bins_, int balls_ ) - { - bins = bins_; - balls = balls_; - Bin = new int[ bins ]; - Reset( *this ); - } - - void Partition::operator+=( int bin ) // Add a ball to this bin. - { - if( bin < 0 || bin >= bins ) fprintf( stderr, "ERROR -- bin number out of range.\n" ); - balls++; - Bin[ bin ]++; - } - - int Partition::operator==( const Partition &P ) const // Compare two partitions. - { - if( Balls() != P.Balls() ) return 0; - if( Bins () != P.Bins () ) return 0; - for( int i = 0; i < bins; i++ ) - { - if( Bin[i] != P[i] ) return 0; - } - return 1; - } - - void Partition::operator=( int n ) // Set to the n'th configuration. - { - Reset( *this ); - for( int i = 0; i < n; i++ ) ++(*this); - } - - int Partition::operator!=( const Partition &P ) const - { - return !( *this == P ); - } - - void Partition::operator=( const Partition &Q ) - { - if( bins != Q.Bins() ) - { - delete[] Bin; - Bin = new int[ Q.Bins() ]; - } - bins = Q.Bins(); - balls = Q.Balls(); - for( int i = 0; i < bins; i++ ) Bin[i] = Q[i]; - } - - void Partition::Get( char *str ) const - { - for( int i = 0; i < bins; i++ ) - str += sprintf( str, "%d ", Bin[i] ); - *str = NullChar; - } - - int Partition::operator[]( int i ) const - { - if( i < 0 || i >= bins ) return 0; - else return Bin[i]; - } - - long Partition::NumCombinations() const // How many distinct configurations. - { - // Think of the k "bins" as being k - 1 "partitions" mixed in with - // the n "balls". If the balls and partitions were each distinguishable - // objects, there would be (n + k - 1)! distinct configurations. - // But since both the balls and the partitions are indistinguishable, - // we simply divide by n! (k - 1)!. This is the binomial coefficient - // ( n + k - 1, n ). - // - if( balls == 0 ) return 0; - if( bins == 1 ) return 1; - return (long)floor( BinomialCoeff( balls + bins - 1, balls ) + 0.5 ); - } - - /*************************************************************************** - * O P E R A T O R + + (Next Partition) * - * * - * Rearranges the n "balls" in k "bins" into the next configuration. * - * The first config is assumed to be all balls in the first bin -- i.e. * - * Bin[0]. All possible groupings are generated, each exactly once. The * - * function returns 1 if successful, 0 if the last config has already been * - * reached. (Algorithm by Harold Zatz) * - * * - ***************************************************************************/ - int Partition::operator++() - { - int i; - if( Bin[0] > 0 ) - { - Bin[1] += 1; - Bin[0] -= 1; - } - else - { - for( i = 1; Bin[i] == 0; i++ ); - if( i == bins - 1 ) return 0; - Bin[i+1] += 1; - Bin[0] = Bin[i] - 1; - Bin[i] = 0; - } - return 1; - } - - void Reset( Partition &P ) - { - P.Bin[0] = P.Balls(); - for( int i = 1; i < P.Bins(); i++ ) P.Bin[i] = 0; - } - - int End( const Partition &P ) - { - return P[ P.Bins() - 1 ] == P.Balls(); - } - - void Print( const Partition &P ) - { - if( P.Bins() > 0 ) - { - printf( "%d", P[0] ); - for( int i = 1; i < P.Bins(); i++ ) printf( " %d", P[i] ); - printf( "\n" ); - } - } -}; diff --git a/src/nvtt/bc7/arvo/Perm.h b/src/nvtt/bc7/arvo/Perm.h deleted file mode 100644 index 2af4776..0000000 --- a/src/nvtt/bc7/arvo/Perm.h +++ /dev/null @@ -1,111 +0,0 @@ -/*************************************************************************** -* Perm.h * -* * -* This file defines permutation class: that is, a class for creating and * -* manipulating finite sequences of distinct integers. The main feature * -* of the class is the "++" operator that can be used to step through all * -* N! permutations of a sequence of N integers. As the set of permutations * -* forms a multiplicative group, a multiplication operator and an * -* exponentiation operator are also defined. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 07/01/93 Added the Partition class. * -* arvo 03/23/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1999, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __PERM_INCLUDED__ -#define __PERM_INCLUDED__ - -namespace ArvoMath { - - class Perm { - public: - Perm( const Perm & ); // Initialize from a permutation. - Perm( int a = 0, int b = 0 ); // Create permutation of ints a...b. - Perm( const char * ); // Create from string of numbers. - ~Perm() { delete p; } // Destructor. - void Get( char * ) const; // Gets a string representation. - int Size() const { return b - a + 1;} // The number of elements. - int Min () const { return a; } // The smallest value. - int Max () const { return b; } // The largest value. - int operator++(); // Make "next" permutation. - int operator--(); // Make "previous" permutation. - Perm &operator+=( int n ); // Advances by n permutations. - Perm &operator-=( int n ); // Decrement by n permutations. - Perm &operator =( const char * ) ; // Resets from string of numbers. - Perm &operator =( const Perm & ) ; // Copy from another permutation. - Perm &operator()( int i, int j ) ; // Swap entries i and j. - int operator()( int n ) const; // Index from Min() to Max(). - int operator[]( int n ) const; // Index from 0 to Size() - 1. - Perm operator ^( int n ) const; // Exponentiation: -1 means inverse. - Perm operator *( const Perm & ) const; // Multiplication means composition. - int operator==( const Perm & ) const; // True if all elements match. - int operator<=( const Perm & ) const; // Lexicographic order relation. - private: - int& Elem( int i ) { return p[i]; } - int Next(); - int Prev(); - int a, b; - int *p; - friend void Reset( Perm & ); - }; - - - // A "Partition" is a collection of k indistinguishable "balls" in n "bins". - // The Partition class encapsulates this notion and provides a convenient means - // of generating all possible partitions of k objects among n bins exactly once. - // Starting with all objects in bin zero, the ++ operator creates new and distinct - // distributions among the bins until all objects are in the last bin. - - class Partition { - public: - Partition( ); // Creates a null partition. - Partition( const Partition & ); // Initialize from another partition. - Partition( int bins, int balls ); // Specify # of bins & balls. - ~Partition() { delete Bin; } // Descructor. - void Get( char * ) const; // Gets a string representation. - int Bins () const { return bins; } // The number of bins. - int Balls() const { return balls; } // The number of balls. - void operator+=( int bin ); // Add a ball to this bin. - void operator =( int n ); // Set to the n'th configuration. - void operator =( const Partition& ); // Copy from another partition. - int operator==( const Partition& ) const; // Compare two partitions. - int operator!=( const Partition& ) const; // Compare two partitions. - int operator++(); // Make "next" partition. - int operator[]( int i ) const; // Return # of balls in bin i. - long NumCombinations() const; // Number of distinct configurations. - private: - int bins; - int balls; - int* Bin; - friend void Reset( Partition & ); - }; - - - // Predicates for determining when a permutation or partition is the last of - // the sequence, functions for printing, resetting, and miscellaneous operations. - - extern int End ( const Partition & ); // True if all balls in last bin. - extern int End ( const Perm & ); // True if descending. - extern int Even ( const Perm & ); // True if even # of 2-cycles. - extern int Odd ( const Perm & ); // True if odd # of 2-cycles. - extern void Print( const Partition & ); // Write to standard out. - extern void Print( const Perm & ); // Write to standard out. - extern void Reset( Partition & ); // Reset to all balls in bin 0. - extern void Reset( Perm & ); // Reset to ascending order. -}; -#endif diff --git a/src/nvtt/bc7/arvo/Rand.cpp b/src/nvtt/bc7/arvo/Rand.cpp deleted file mode 100644 index 5f3025b..0000000 --- a/src/nvtt/bc7/arvo/Rand.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/*************************************************************************** -* Rand.C (Random Number Generators) * -* * -* Source file for pseudo-random number utilities. Rand is the * -* base class for several different algorithms for generating pseudo-random * -* numbers. Any method can generate individual samples or arrays of * -* samples using "Eval". The random seed can be reset at any time by * -* calling "Seed" with any integer. Random permutations of the integers * -* 0,1,...(n-1) are generated by "Perm(n,P)". * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 08/04/97 Changed to virtual functions. * -* arvo 06/06/93 Optimization, especially for array evaluators. * -* arvo 10/06/91 Converted to C++ * -* arvo 11/20/89 Added "gen_seed" function to handle. * -* arvo 10/30/89 "state" allocation now done in rand_alloc. * -* arvo 07/08/89 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1989, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include -#include "Rand.h" - -namespace ArvoMath { -#ifndef ABS -#define ABS( x ) ((x) > 0 ? (x) : -(x)) -#endif - - /*-------------------------------------------------------------------------* - * M E T H O D 1 * - * * - * From "Numerical Recipes," by William H. Press, Brian P. Flannery, * - * Saul A. Teukolsky, and William T. Vetterling, p. 197. * - * * - *-------------------------------------------------------------------------*/ - static const long M1 = 714025; - static const long IA = 1366; - static const long IC = 150889; - static const double RM = 1.400512E-6; - - float RandGen_1::Eval() - { - register long *elem; - register long offset; - register float rand; - offset = 1 + ( 97 * index ) / M1; - if( offset > 97 ) offset = 97; - if( offset < 1 ) offset = 1; - elem = shuffle + offset; - rand = ( index = *elem ) * RM; - *elem = ( seed = ( IA * seed + IC ) % M1 ); - return rand; - } - - void RandGen_1::Eval( int n, float *array ) - { - register long *shfl = shuffle; - register long *elem; - register long offset; - for( int i = 0; i < n; i++ ) - { - offset = 1 + ( 97 * index ) / M1; - if( offset > 97 ) offset = 97; - if( offset < 1 ) offset = 1; - elem = shfl + offset; - *array++ = ( index = *elem ) * RM; - *elem = ( seed = ( IA * seed + IC ) % M1 ); - } - } - - void RandGen_1::Seed( long seed ) - { - long t = ( IC + ABS( seed ) + 1 ) % M1; - for( register int k = 1; k <= 97; k++ ) - { - t = ( IA * t + IC ) % M1; - shuffle[k] = ABS( t ); - } - t = ( IA * t + IC ) % M1; - seed = ABS( t ); - index = ABS( t ); - } - - /*-------------------------------------------------------------------------* - * M E T H O D 2 * - * * - * From "The Multiple Prime Random Number Generator," by Alexander Haas, * - * ACM Transactions on Mathematical Software, Vol. 13, No. 4, December * - * 1987, pp. 368-381. * - * * - *-------------------------------------------------------------------------*/ - float RandGen_2::Eval() - { - if( (m += 7 ) >= 9973 ) m -= 9871; - if( (i += 1907 ) >= 99991 ) i -= 89989; - if( (j += 73939) >= 224729 ) j -= 96233; - r = ((r * m + i + j) % 100000) / 10; - return r * 1.00010001E-4; - } - - void RandGen_2::Eval( int n, float *array ) - { - for( register int k = 0; k < n; k++ ) - { - if( (m += 7 ) >= 9973 ) m -= 9871; - if( (i += 1907 ) >= 99991 ) i -= 89989; - if( (j += 73939) >= 224729 ) j -= 96233; - r = ((r * m + i + j) % 100000) / 10; - *array++ = r * 1.00010001E-4; - } - } - - void RandGen_2::Seed( long seed ) - { - r = ABS( seed ); - m = ABS( seed * 7 ); - i = ABS( seed * 11 ); - j = ABS( seed * 13 ); - if( m < 100 ) m += 100; - if( i < 10000 ) i += 10000; - if( j < 128000 ) j += 128000; - } - - /*-------------------------------------------------------------------------* - * M E T H O D 3 * - * * - * From "A More Portable Fortran Random Number Generator," by Linus * - * Schrage, ACM Transactions on Mathematical Software, Vol. 5, No, 2, * - * June 1979, pp. 132-138. * - * * - *-------------------------------------------------------------------------*/ - static const long A3 = 16807; - static const long P3 = 2147483647; - - float RandGen_3::Eval() - { - long xhi = ix >> 16; - long xalo = ( ix & 0xFFFF ) * A3; - long leftlo = xalo >> 16; - long fhi = xhi * A3 + leftlo; - long k = fhi >> 15; - ix = ( ((xalo - (leftlo << 16)) - P3) + - ((fhi - (k << 15)) << 16) ) + k; - if( ix < 0 ) ix += P3; - return ix * 4.656612875E-10; - } - - void RandGen_3::Eval( int n, float *array ) - { - register long xhi, xalo, leftlo; - register long fhi, k; - for( register int i = 0; i < n; i++ ) - { - xhi = ix >> 16; - xalo = ( ix & 0xFFFF ) * A3; - leftlo = xalo >> 16; - fhi = xhi * A3 + leftlo; - k = fhi >> 15; - ix = ( ((xalo - (leftlo << 16)) - P3) + - ((fhi - (k << 15)) << 16) ) + k; - if( ix < 0 ) ix += P3; - *array++ = ix * 4.656612875E-10; - } - } - - void RandGen_3::Seed( long seed ) - { - ix = ABS( seed ); - } - - /*-------------------------------------------------------------------------* - * R A N D : : P E R M (Permutation) * - * * - * This routine fills an integer array of length "len" with a random * - * permutation of the integers 0, 1, 2, ... (len-1). * - * * - * For efficiency, the random numbers are generated in batches of up to * - * "Nmax" at a time. The constant Nmax can be set to any value >= 1. * - * * - *-------------------------------------------------------------------------*/ - static const int Nmax = 20; - - void RandGen::Perm( int len, int perm[] ) - { - float R[ Nmax ]; // A buffer for getting random numbers. - int L = len - 1; // Total number of random numbers needed. - int N = 0; // How many to generate when we call Eval. - int n = 0; // The array index into R. - - // First initialize the array "perm" to the identity permutation. - - for( int j = 0; j < len; j++ ) perm[j] = j; - - // Now swap a random element in the front with the i'th element. - // When i gets down to 0, we're done. - - for( int i = len - 1; i > 0; i-- ) // Element i is a swap candidate. - { - if( n == N ) // Generate more random numbers. - { - N = ( L < Nmax ) ? L : Nmax; // Can't get more than "Nmax". - Eval( N, R ); // Generate N random numbers. - L -= N; // Decrement total counter. - n = 0; // Start index at beginning of R. - } - float r = ( i + 1 ) * R[ n++ ]; // Pick a float in [0,i+1]. - int k = (int)r; // Truncate r to an integer. - if( k < i ) // Disregard k == i and k == i+1. - { - int tmp = perm[i]; // Swap elements i and k. - perm[i] = perm[k]; - perm[k] = tmp; - } - } - } -}; diff --git a/src/nvtt/bc7/arvo/Rand.h b/src/nvtt/bc7/arvo/Rand.h deleted file mode 100644 index a8ef5d9..0000000 --- a/src/nvtt/bc7/arvo/Rand.h +++ /dev/null @@ -1,114 +0,0 @@ -/*************************************************************************** -* Rand.h (Random Number Generators) * -* * -* Header file for Rand.C, pseudo-random number utilities. Rand is the * -* base class for several different algorithms for generating pseudo-random * -* numbers. Any method can generate individual samples or arrays of * -* samples using "Eval". The random seed can be reset at any time by * -* calling "Seed" with any integer. Random permutations of the integers * -* 0,1,...(n-1) are generated by "Perm(n,P)". * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 08/04/97 Changed to virtual functions. * -* arvo 06/06/93 Optimization, especially for array evaluators. * -* arvo 10/06/91 Converted to C++ * -* arvo 11/20/89 Added "gen_seed" function to handle. * -* arvo 10/30/89 "state" allocation now done in rand_alloc. * -* arvo 07/08/89 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1989, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __RAND_INCLUDED__ -#define __RAND_INCLUDED__ - -namespace ArvoMath { - - // Base class for random number generators. This class contains - // several pure virtual functions, so it cannot be instanced directly. - - class RandGen { - public: - RandGen() {} - virtual float Eval( ) = 0; - virtual void Eval( int n, float x[] ) = 0; - virtual void Seed( long seed ) = 0; - public: - void Perm( int n, int P[] ); - float Interval( float a, float b ); - void Eval( float &x ) { x = Eval(); } - }; - - - // Method 1: From "Numerical Recipes," by William H. Press, Brian P. - // Flannery, Saul A. Teukolsky, and William T. Vetterling, p. 197. - - class RandGen_1 : public RandGen { - public: - RandGen_1( ) { Seed( 1 ); } - RandGen_1( long seed ) { Seed( seed ); } - virtual float Eval( ); - virtual void Eval( int n, float x[] ); - virtual void Seed( long seed ); - private: - long index; - long seed; - long shuffle[ 98 ]; - }; - - - // Method 2: From "The Multiple Prime Random Number Generator," by - // Alexander Haas, ACM Transactions on Mathematical Software, - // Vol. 13, No. 4, December 1987, pp. 368-381. * - - class RandGen_2 : public RandGen { - public: - RandGen_2( ) { Seed( 1 ); } - RandGen_2( long seed ) { Seed( seed ); } - virtual float Eval( ); - virtual void Eval( int n, float x[] ); - virtual void Seed( long seed ); - private: - long r; - long m; - long i; - long j; - }; - - - // Method 3: From "A More Portable Fortran Random Number Generator," - // by Linus Schrage, ACM Transactions on Mathematical Software, - // Vol. 5, No, 2, June 1979, pp. 132-138. * - - class RandGen_3 : public RandGen { - public: - RandGen_3( ) { Seed( 1 ); } - RandGen_3( long seed ) { Seed( seed ); } - virtual float Eval( ); - virtual void Eval( int n, float x[] ); - virtual void Seed( long seed ); - private: - long ix; - }; - - - inline float RandGen::Interval( float a, float b ) - { - return ( a < b ) ? - a + Eval() * ( b - a ) : - b + Eval() * ( a - b ) ; - } -}; -#endif diff --git a/src/nvtt/bc7/arvo/SI_units.h b/src/nvtt/bc7/arvo/SI_units.h deleted file mode 100644 index 69cc8cc..0000000 --- a/src/nvtt/bc7/arvo/SI_units.h +++ /dev/null @@ -1,232 +0,0 @@ -/***************************************************************************** -** -** MODULE NAME SI_units.h International System of Units (SI) -** -** DESCRIPTION -** The purpose of this header file is to provide a simple and efficient -** mechanism for associating physically meaningful units with floating -** point numbers. No extra space is required, and no runtime overhead -** is introduced; all type-checking occurs at compile time. -** -** -** HISTORY -** Name Date Description -** -** arvo 02/09/92 Replaced conversion macros with inline functions. -** arvo 10/16/91 Initial implementation. -** -** -** (c) Copyright 1991, 1992 -** Program of Computer Graphics, Cornell University, Ithaca, NY -** ALL RIGHTS RESERVED -** -*****************************************************************************/ - -#ifndef SI_UNITS_H -#define SI_UNITS_H - -#include - -namespace ArvoMath { - - const float - SI_deci = 1.0E-1, - SI_centi = 1.0E-2, - SI_milli = 1.0E-3, - SI_micro = 1.0E-6, - SI_nano = 1.0E-9, - SI_kilo = 1.0E+3, - SI_mega = 1.0E+6, - SI_giga = 1.0E+9, - SI_tera = 1.0E+12; - - /******************************************************************************* - * * - * I N T E R N A T I O N A L S Y S T E M O F U N I T S * - * * - ******************************************************************************** - * * - * DIMENSION CLASS INITIALIZER SYMBOL BASE UNITS * - * * - * length SI_length meter m m * - * time SI_time second s s * - * mass SI_mass kilogram kg kg * - * angle SI_angle radian rad rad * - * solid angle SI_solid_angle steradian sr sr * - * temperature SI_temperature kelvin K K * - * luminous intensity SI_lum_inten candela cd cd * - * area SI_area meter2 m2 m2 * - * volume SI_volume meter3 m3 m3 * - * frequency SI_frequency hertz Hz 1/s * - * force SI_force newton N m kg/s2 * - * energy SI_energy joule J m2 kg/s2 * - * power SI_power watt W m2 kg/s3 * - * radiance SI_radiance watts_per_m2sr W/m2sr kg/(s3 sr) * - * irradiance SI_irradiance watts_per_m2 W/m2 kg/s3 * - * radiant intensity SI_rad_inten watts_per_sr W/sr m2 kg/(s3 sr) * - * luminance SI_luminance candela_per_m2 cd/m2 cd/m2 * - * illuminance SI_illuminance lux lx cd sr/m2 * - * luminous flux SI_lum_flux lumen lm cd sr * - * luminous energy SI_lum_energy talbot tb cd sr s * - * * - *******************************************************************************/ - - class SI_dimensionless { - public: - float Value() const { return value; } - ostream& Put( ostream &s, char *a ) { return s << value << " " << a; } - protected: - SI_dimensionless() { value = 0; } - SI_dimensionless( float x ){ value = x; } - float value; - }; - - /******************************************************************************* - * The following macro is used for creating new quantity classes and their * - * corresponding initializing functions and abbreviations. This macro is * - * not intended to be used outside of this file -- it is a compact means of * - * defining generic operations for each quantity (e.g. scaling & comparing). * - *******************************************************************************/ - -#define SI_Make( C, Initializer, Symbol ) \ - struct C : SI_dimensionless { \ - C ( ) : SI_dimensionless( ) {}; \ - C ( float x ) : SI_dimensionless( x ) {}; \ - C operator * ( float x ) { return C( value * x ); } \ - C operator / ( float x ) { return C( value / x ); } \ - C operator /= ( float x ) { return C( value /= x ); } \ - C operator *= ( float x ) { return C( value *= x ); } \ - C operator + ( C x ) { return C( value + x.Value() ); } \ - C operator - ( ) { return C(-value ); } \ - C operator - ( C x ) { return C( value - x.Value() ); } \ - C operator += ( C x ) { return C( value += x.Value() ); } \ - C operator -= ( C x ) { return C( value -= x.Value() ); } \ - C operator = ( C x ) { return C( value = x.Value() ); } \ - int operator > ( C x ) { return ( value > x.Value() ); } \ - int operator < ( C x ) { return ( value < x.Value() ); } \ - int operator >= ( C x ) { return ( value >= x.Value() ); } \ - int operator <= ( C x ) { return ( value <= x.Value() ); } \ - float operator / ( C x ) { return ( value / x.Value() ); } \ - }; \ - inline ostream& operator<<(ostream &s, C x) {return x.Put(s,Symbol);} \ - inline C Initializer( float x ) { return C( x ); } \ - inline C operator * ( float x, C y ) { return C( x * y.Value() ); } - - /******************************************************************************* - * The following macros define permissible arithmetic operations among * - * variables with different physical meanings. This ensures that the * - * result of any such operation is ALWAYS another meaningful quantity. * - *******************************************************************************/ - -#define SI_Square( A, B ) \ - inline B operator*( A x, A y ) { return B( x.Value() * y.Value() ); } \ - inline A operator/( B x, A y ) { return A( x.Value() / y.Value() ); } - -#define SI_Recip( A, B ) \ - inline B operator/( float x, A y ) { return B( x / y.Value() ); } \ - inline A operator/( float x, B y ) { return A( x / y.Value() ); } \ - inline float operator*( A x, B y ) { return x.Value() * y.Value(); } \ - inline float operator*( B x, A y ) { return x.Value() * y.Value(); } - -#define SI_Times( A, B, C ) \ - inline C operator*( A x, B y ) { return C( x.Value() * y.Value() ); } \ - inline C operator*( B x, A y ) { return C( x.Value() * y.Value() ); } \ - inline A operator/( C x, B y ) { return A( x.Value() / y.Value() ); } \ - inline B operator/( C x, A y ) { return B( x.Value() / y.Value() ); } - - /******************************************************************************* - * The following macros create classes for a variety of quantities. These * - * include base qunatities such as "time" and "length" as well as derived * - * quantities such as "power" and "volume". Each quantity is provided with * - * an initialization function in SI units and an abbreviation for printing. * - *******************************************************************************/ - - SI_Make( SI_length , meter , "m" ); // Base Units: - SI_Make( SI_mass , kilogram , "kg" ); - SI_Make( SI_time , second , "s" ); - SI_Make( SI_lum_inten , candela , "cd" ); - SI_Make( SI_temperature , kelvin , "K" ); - SI_Make( SI_angle , radian , "rad" ); // Supplementary: - SI_Make( SI_solid_angle , steradian , "sr" ); - SI_Make( SI_area , meter2 , "m2" ); // Derived units: - SI_Make( SI_volume , meter3 , "m3" ); - SI_Make( SI_frequency , hertz , "Hz" ); - SI_Make( SI_force , newton , "N" ); - SI_Make( SI_energy , joule , "J" ); - SI_Make( SI_power , watt , "W" ); - SI_Make( SI_radiance , watts_per_m2sr , "W/m2sr" ); - SI_Make( SI_irradiance , watts_per_m2 , "W/m2" ); - SI_Make( SI_rad_inten , watts_per_sr , "W/sr" ); - SI_Make( SI_luminance , candela_per_m2 , "cd/m2" ); - SI_Make( SI_illuminance , lux , "lx" ); - SI_Make( SI_lum_flux , lumen , "lm" ); - SI_Make( SI_lum_energy , talbot , "tb" ); - SI_Make( SI_time2 , second2 , "s2" ); // Intermediate: - SI_Make( SI_sa_area , meter2_sr , "m2sr" ); - SI_Make( SI_inv_area , inv_meter2 , "1/m2" ); - SI_Make( SI_inv_solid_angle, inv_steradian , "1/sr" ); - SI_Make( SI_length_temp , meters_kelvin , "m K" ); - SI_Make( SI_power_area , watts_m2 , "W m2" ); - SI_Make( SI_power_per_volume, watts_per_m3 , "W/m3" ); - - SI_Square( SI_length , SI_area ); - SI_Square( SI_time , SI_time2 ); - SI_Recip ( SI_time , SI_frequency ); - SI_Recip ( SI_area , SI_inv_area ); - SI_Recip ( SI_solid_angle , SI_inv_solid_angle ); - - SI_Times( SI_area , SI_length , SI_volume ); - SI_Times( SI_force , SI_length , SI_energy ); - SI_Times( SI_power , SI_time , SI_energy ); - SI_Times( SI_lum_flux , SI_time , SI_lum_energy ); - SI_Times( SI_lum_inten , SI_solid_angle , SI_lum_flux ); - SI_Times( SI_radiance , SI_solid_angle , SI_irradiance ); - SI_Times( SI_rad_inten , SI_solid_angle , SI_power ); - SI_Times( SI_irradiance , SI_area , SI_power ); - SI_Times( SI_illuminance , SI_area , SI_lum_flux ); - SI_Times( SI_solid_angle , SI_area , SI_sa_area ); - SI_Times( SI_radiance , SI_sa_area , SI_power ); - SI_Times( SI_irradiance , SI_inv_solid_angle, SI_radiance ); - SI_Times( SI_power , SI_inv_solid_angle, SI_rad_inten ); - SI_Times( SI_length , SI_temperature , SI_length_temp ); - SI_Times( SI_power , SI_area , SI_power_area ); - - /******************************************************************************* - * Following are some useful non-SI units. These units can be used in place of * - * the unit-initializers above. Thus, a variable of type SI_length, for example* - * may be initialized in "meters", "inches", or "centimeters". In all cases, * - * however, the value is converted to the underlying SI unit (e.g. meters). * - *******************************************************************************/ - -#define SI_Convert( SI, New, Old ) inline SI New( float x ) { return x * Old; } - - SI_Convert( SI_time , minute , second( 60.0 ) ); - SI_Convert( SI_time , hour , minute( 60.0 ) ); - SI_Convert( SI_force , dyne , newton( 1.0E-5 ) ); - SI_Convert( SI_energy , erg , joule( 1.0E-7 ) ); - SI_Convert( SI_power , kilowatt , watt( SI_kilo ) ); - SI_Convert( SI_mass , gram , kilogram( SI_milli ) ); - SI_Convert( SI_length , inch , meter( 2.54E-2 ) ); - SI_Convert( SI_length , foot , inch( 12.0 ) ); - SI_Convert( SI_length , centimeter , meter( SI_centi ) ); - SI_Convert( SI_length , micron , meter( SI_micro ) ); - SI_Convert( SI_length , angstrom , meter( 1.0E-10 ) ); - SI_Convert( SI_area , barn , meter2( 1.0E-28 ) ); - SI_Convert( SI_angle , degree , radian( 0.017453 ) ); - SI_Convert( SI_illuminance , phot , lux( 1.0E+4 ) ); - SI_Convert( SI_illuminance , footcandle , lux( 9.29E-2 ) ); - SI_Convert( SI_luminance , stilb , candela_per_m2( 1.0E+4 ) ); - - /******************************************************************************* - * Often there are multiple names for a single quantity. Below are some * - * synonyms for the quantities defined above. These can be used in place of * - * the original quantities and may be clearer in some contexts. * - *******************************************************************************/ - - typedef SI_power SI_radiant_flux; - typedef SI_irradiance SI_radiant_flux_density; - typedef SI_irradiance SI_radiant_exitance; - typedef SI_radiance SI_intensity; - typedef SI_irradiance SI_radiosity; -}; -#endif \ No newline at end of file diff --git a/src/nvtt/bc7/arvo/SVD.cpp b/src/nvtt/bc7/arvo/SVD.cpp deleted file mode 100644 index 36f0ea6..0000000 --- a/src/nvtt/bc7/arvo/SVD.cpp +++ /dev/null @@ -1,398 +0,0 @@ -/*************************************************************************** -* SVD.C * -* * -* Singular Value Decomposition. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 08/22/2000 Copied to CIT library. * -* arvo 06/28/1993 Rewritten from "Numerical Recipes" C-code. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 2000, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include -#include "ArvoMath.h" -#include "Vector.h" -#include "Matrix.h" -#include "SVD.h" - -namespace ArvoMath { - static const int MaxIterations = 30; - - static double svd_pythag( double a, double b ) - { - double at = Abs(a); - double bt = Abs(b); - if( at > bt ) - return at * sqrt( 1.0 + Sqr( bt / at ) ); - else if( bt > 0.0 ) - return bt * sqrt( 1.0 + Sqr( at / bt ) ); - else return 0.0; - } - - static inline double SameSign( double a, double b ) - { - double t; - if( b >= 0.0 ) t = Abs( a ); - else t = -Abs( a ); - return t; - } - - static int ComputeRank( const Matrix &D, double epsilon ) - { - int rank = 0; - for( int i = 0; i < D.Rows(); i++ ) - if( Abs(D(i,i)) > epsilon ) rank++; - return rank; - } - - SVD::SVD( ) : Q_(0), D_(0), R_(0) - { - } - - SVD::SVD( const Matrix &M ) : Q_(0), D_(0), R_(0) - { - (*this) = M; - } - - void SVD::operator=( const Matrix &A ) - { - if( A.Rows() >= A.Cols() ) Q_ = A; - else - { - Q_ = Matrix( A.Cols() ); - for( int i = 0; i < A.Rows(); i++ ) - for( int j = 0; j < A.Cols(); j++ ) Q_(i,j) = A(i,j); - } - R_ = Matrix( A.Cols() ); - Decompose( Q_, D_, R_ ); - } - - const Matrix &SVD::Q( double epsilon ) const - { - int rank = 0; - if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon ); - return Q_; - } - - const Matrix &SVD::D( double epsilon ) const - { - int rank = 0; - if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon ); - return D_; - } - - const Matrix &SVD::R( double epsilon ) const - { - int rank = 0; - if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon ); - return R_; - } - - int SVD::Rank( double epsilon ) const - { - return ComputeRank( D_, epsilon ); - } - - int SVD::Decompose( Matrix &Q, Matrix &D, Matrix &R ) - { - int i, j, k, l, m, n, p, q, iter; - double c, f, h, s, x, y, z; - double norm = 0.0; - double g = 0.0; - double scale = 0.0; - - m = Q.Rows(); - n = Q.Cols(); - - Vector Temp( n ); - Vector diag( n ); - - for( i = 0; i < n; i++ ) - { - - Temp(i) = scale * g; - scale = 0.0; - g = 0.0; - s = 0.0; - l = i + 1; - - if( i < m ) - { - for( k = i; k < m; k++ ) scale += Abs( Q(k,i) ); - if( scale != 0.0 ) - { - for( k = i; k < m; k++ ) - { - Q(k,i) /= scale; - s += Sqr( Q(k,i) ); - } - f = Q(i,i); - g = -SameSign( sqrt(s), f ); - h = f * g - s; - Q(i,i) = f - g; - if( i != n - 1 ) - { - for( j = l; j < n; j++ ) - { - s = 0.0; - for( k = i; k < m; k++ ) s += Q(k,i) * Q(k,j); - f = s / h; - for( k = i; k < m; k++ ) Q(k,j) += f * Q(k,i); - } - } - for( k = i; k < m; k++ ) Q(k,i) *= scale; - } - } - - diag(i) = scale * g; - g = 0.0; - s = 0.0; - scale = 0.0; - - if( i < m && i != n - 1 ) - { - for( k = l; k < n; k++ ) scale += Abs( Q(i,k) ); - if( scale != 0.0 ) - { - for( k = l; k < n; k++ ) - { - Q(i,k) /= scale; - s += Sqr( Q(i,k) ); - } - f = Q(i,l); - g = -SameSign( sqrt(s), f ); - h = f * g - s; - Q(i,l) = f - g; - for( k = l; k < n; k++ ) Temp(k) = Q(i,k) / h; - if( i != m - 1 ) - { - for( j = l; j < m; j++ ) - { - s = 0.0; - for( k = l; k < n; k++ ) s += Q(j,k) * Q(i,k); - for( k = l; k < n; k++ ) Q(j,k) += s * Temp(k); - } - } - for( k = l; k < n; k++ ) Q(i,k) *= scale; - } - } - norm = Max( norm, Abs( diag(i) ) + Abs( Temp(i) ) ); - } - - - for( i = n - 1; i >= 0; i-- ) - { - if( i < n - 1 ) - { - if( g != 0.0 ) - { - for( j = l; j < n; j++ ) R(i,j) = ( Q(i,j) / Q(i,l) ) / g; - for( j = l; j < n; j++ ) - { - s = 0.0; - for( k = l; k < n; k++ ) s += Q(i,k) * R(j,k); - for( k = l; k < n; k++ ) R(j,k) += s * R(i,k); - } - } - for( j = l; j < n; j++ ) - { - R(i,j) = 0.0; - R(j,i) = 0.0; - } - } - R(i,i) = 1.0; - g = Temp(i); - l = i; - } - - - for( i = n - 1; i >= 0; i-- ) - { - l = i + 1; - g = diag(i); - if( i < n - 1 ) for( j = l; j < n; j++ ) Q(i,j) = 0.0; - if( g != 0.0 ) - { - g = 1.0 / g; - if( i != n - 1 ) - { - for( j = l; j < n; j++ ) - { - s = 0.0; - for( k = l; k < m; k++ ) s += Q(k,i) * Q(k,j); - f = ( s / Q(i,i) ) * g; - for( k = i; k < m; k++ ) Q(k,j) += f * Q(k,i); - } - } - for( j = i; j < m; j++ ) Q(j,i) *= g; - } - else - { - for( j = i; j < m; j++ ) Q(j,i) = 0.0; - } - Q(i,i) += 1.0; - } - - - for( k = n - 1; k >= 0; k-- ) - { - for( iter = 1; iter <= MaxIterations; iter++ ) - { - int jump; - - for( l = k; l >= 0; l-- ) - { - q = l - 1; - if( Abs( Temp(l) ) + norm == norm ) { jump = 1; break; } - if( Abs( diag(q) ) + norm == norm ) { jump = 0; break; } - } - - if( !jump ) - { - c = 0.0; - s = 1.0; - for( i = l; i <= k; i++ ) - { - f = s * Temp(i); - Temp(i) *= c; - if( Abs( f ) + norm == norm ) break; - g = diag(i); - h = svd_pythag( f, g ); - diag(i) = h; - h = 1.0 / h; - c = g * h; - s = -f * h; - for( j = 0; j < m; j++ ) - { - y = Q(j,q); - z = Q(j,i); - Q(j,q) = y * c + z * s; - Q(j,i) = z * c - y * s; - } - } - } - - z = diag(k); - if( l == k ) - { - if( z < 0.0 ) - { - diag(k) = -z; - for( j = 0; j < n; j++ ) R(k,j) *= -1.0; - } - break; - } - if( iter >= MaxIterations ) return 0; - x = diag(l); - q = k - 1; - y = diag(q); - g = Temp(q); - h = Temp(k); - f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0 * h * y ); - g = svd_pythag( f, 1.0 ); - f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x; - c = 1.0; - s = 1.0; - for( j = l; j <= q; j++ ) - { - i = j + 1; - g = Temp(i); - y = diag(i); - h = s * g; - g = c * g; - z = svd_pythag( f, h ); - Temp(j) = z; - c = f / z; - s = h / z; - f = x * c + g * s; - g = g * c - x * s; - h = y * s; - y = y * c; - for( p = 0; p < n; p++ ) - { - x = R(j,p); - z = R(i,p); - R(j,p) = x * c + z * s; - R(i,p) = z * c - x * s; - } - z = svd_pythag( f, h ); - diag(j) = z; - if( z != 0.0 ) - { - z = 1.0 / z; - c = f * z; - s = h * z; - } - f = c * g + s * y; - x = c * y - s * g; - for( p = 0; p < m; p++ ) - { - y = Q(p,j); - z = Q(p,i); - Q(p,j) = y * c + z * s; - Q(p,i) = z * c - y * s; - } - } - Temp(l) = 0.0; - Temp(k) = f; - diag(k) = x; - } - } - - // Sort the singular values into descending order. - - for( i = 0; i < n - 1; i++ ) - { - double biggest = diag(i); // Biggest singular value so far. - int bindex = i; // The row/col it occurred in. - for( j = i + 1; j < n; j++ ) - { - if( diag(j) > biggest ) - { - biggest = diag(j); - bindex = j; - } - } - if( bindex != i ) // Need to swap rows and columns. - { - Q.SwapCols( i, bindex ); // Swap columns in Q. - R.SwapRows( i, bindex ); // Swap rows in R. - diag.Swap ( i, bindex ); // Swap elements in diag. - } - } - - D = Diag( diag ); - return 1; - } - - - const Matrix &SVD::PseudoInverse( double epsilon ) - { - if( Null(P_) ) - { - Matrix D_Inverse( D_ ); - for( int i = 0; i < D_Inverse.Rows(); i++ ) - { - if( Abs( D_Inverse(i,i) ) > epsilon ) - D_Inverse(i,i) = 1.0 / D_Inverse(i,i); - else D_Inverse(i,i) = 0.0; - } - P_ = Q_ * D_Inverse * R_; - } - return P_; - } -}; diff --git a/src/nvtt/bc7/arvo/SVD.h b/src/nvtt/bc7/arvo/SVD.h deleted file mode 100644 index d6bf850..0000000 --- a/src/nvtt/bc7/arvo/SVD.h +++ /dev/null @@ -1,54 +0,0 @@ -/*************************************************************************** -* SVD.h * -* * -* Singular Value Decomposition. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 08/22/2000 Split off from Matrix.h * -* arvo 06/28/1993 Rewritten from "Numerical Recipes" C-code. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 2000, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __SVD_INCLUDED__ -#define __SVD_INCLUDED__ - -#include "Vector.h" -#include "Matrix.h" - -namespace ArvoMath { - - class SVD { - public: - SVD( ); - SVD( const SVD & ); // Copies the decomposition. - SVD( const Matrix & ); // Performs the decomposition. - ~SVD() {}; - const Matrix &Q( double epsilon = 0.0 ) const; - const Matrix &D( double epsilon = 0.0 ) const; - const Matrix &R( double epsilon = 0.0 ) const; - const Matrix &PseudoInverse( double epsilon = 0.0 ); - int Rank( double epsilon = 0.0 ) const; - void operator=( const Matrix & ); // Performs the decomposition. - private: - int Decompose( Matrix &Q, Matrix &D, Matrix &R ); - Matrix Q_; - Matrix D_; - Matrix R_; - Matrix P_; // Pseudo inverse. - int error; - }; -}; -#endif diff --git a/src/nvtt/bc7/arvo/SphTri.cpp b/src/nvtt/bc7/arvo/SphTri.cpp deleted file mode 100644 index 40de956..0000000 --- a/src/nvtt/bc7/arvo/SphTri.cpp +++ /dev/null @@ -1,292 +0,0 @@ -/*************************************************************************** -* SphTri.C * -* * -* This file defines the SphericalTriangle class definition, which * -* supports member functions for Monte Carlo sampling, point containment, * -* and other basic operations on spherical triangles. * -* * -* Changes: * -* 01/01/2000 arvo Added New_{Alpha,Beta,Gamma} methods. * -* 12/30/1999 arvo Added VecIrrad method for "Vector Irradiance". * -* 04/08/1995 arvo Further optimized sampling algorithm. * -* 10/11/1994 arvo Added analytic sampling algorithm. * -* 06/14/1994 arvo Initial implementation. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1995, 2000, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include -#include "SphTri.h" -#include "form.h" -namespace ArvoMath { - /*-------------------------------------------------------------------------* - * Constructor * - * * - * Construct a spherical triangle from three (non-zero) vectors. The * - * vectors needn't be of unit length. * - * * - *-------------------------------------------------------------------------*/ - SphericalTriangle::SphericalTriangle( const Vec3 &A0, const Vec3 &B0, const Vec3 &C0 ) - { - Init( A0, B0, C0 ); - } - - /*-------------------------------------------------------------------------* - * Init * - * * - * Construct the spherical triange from three vertices. Assume that the * - * sphere is centered at the origin. The vectors A, B, and C need not * - * be normalized. * - * * - *-------------------------------------------------------------------------*/ - void SphericalTriangle::Init( const Vec3 &A0, const Vec3 &B0, const Vec3 &C0 ) - { - // Normalize the three vectors -- these are the vertices. - - A_ = Unit( A0 ); - B_ = Unit( B0 ); - C_ = Unit( C0 ); - - // Compute and save the cosines of the edge lengths. - - cos_a = B_ * C_; - cos_b = A_ * C_; - cos_c = A_ * B_; - - // Compute and save the edge lengths. - - a_ = ArcCos( cos_a ); - b_ = ArcCos( cos_b ); - c_ = ArcCos( cos_c ); - - // Compute the cosines of the internal (i.e. dihedral) angles. - - cos_alpha = CosDihedralAngle( C_, A_, B_ ); - cos_beta = CosDihedralAngle( A_, B_, C_ ); - cos_gamma = CosDihedralAngle( A_, C_, B_ ); - - // Compute the (dihedral) angles. - - alpha = ArcCos( cos_alpha ); - beta = ArcCos( cos_beta ); - gamma = ArcCos( cos_gamma ); - - // Compute the solid angle of the spherical triangle. - - area = alpha + beta + gamma - Pi; - - // Compute the orientation of the triangle. - - orient = Sign( A_ * ( B_ ^ C_ ) ); - - // Initialize three variables that are used for sampling the triangle. - - U = Unit( C_ / A_ ); // In plane of AC orthogonal to A. - sin_alpha = sin( alpha ); - product = sin_alpha * cos_c; - } - - /*-------------------------------------------------------------------------* - * Init * - * * - * Initialize all fields. Create a null spherical triangle. * - * * - *-------------------------------------------------------------------------*/ - void SphericalTriangle::Init() - { - a_ = 0; A_ = 0; cos_alpha = 0; cos_a = 0; alpha = 0; - b_ = 0; B_ = 0; cos_beta = 0; cos_b = 0; beta = 0; - c_ = 0; C_ = 0; cos_gamma = 0; cos_c = 0; gamma = 0; - area = 0; - orient = 0; - sin_alpha = 0; - product = 0; - U = 0; - } - - /*-------------------------------------------------------------------------* - * "( A, B, C )" operator. * - * * - * Construct the spherical triange from three vertices. Assume that the * - * sphere is centered at the origin. The vectors A, B, and C need not * - * be normalized. * - * * - *-------------------------------------------------------------------------*/ - SphericalTriangle & SphericalTriangle::operator()( - const Vec3 &A0, - const Vec3 &B0, - const Vec3 &C0 ) - { - Init( A0, B0, C0 ); - return *this; - } - - /*-------------------------------------------------------------------------* - * Inside * - * * - * Determine if the vector W is inside the triangle. W need not be a * - * unit vector * - * * - *-------------------------------------------------------------------------*/ - int SphericalTriangle::Inside( const Vec3 &W ) const - { - Vec3 Z = Orient() * W; - if( Z * ( A() ^ B() ) < 0.0 ) return 0; - if( Z * ( B() ^ C() ) < 0.0 ) return 0; - if( Z * ( C() ^ A() ) < 0.0 ) return 0; - return 1; - } - - /*-------------------------------------------------------------------------* - * Chart * - * * - * Generate samples from the current spherical triangle. If x1 and x2 are * - * random variables uniformly distributed over [0,1], then the returned * - * points are uniformly distributed over the solid angle. * - * * - *-------------------------------------------------------------------------*/ - Vec3 SphericalTriangle::Chart( float x1, float x2 ) const - { - // Use one random variable to select the area of the sub-triangle. - // Save the sine and cosine of the angle phi. - - register float phi = x1 * area - Alpha(); - register float s = sin( phi ); - register float t = cos( phi ); - - // Compute the pair (u,v) that determines the new angle beta. - - register float u = t - cos_alpha; - register float v = s + product ; // sin_alpha * cos_c - - // Compute the cosine of the new edge b. - - float q = ( cos_alpha * ( v * t - u * s ) - v ) / - ( sin_alpha * ( u * t + v * s ) ); - - // Compute the third vertex of the sub-triangle. - - Vec3 C_new = q * A() + Sqrt( 1.0 - q * q ) * U; - - // Use the other random variable to select the height z. - - float z = 1.0 - x2 * ( 1.0 - C_new * B() ); - - // Construct the corresponding point on the sphere. - - Vec3 D = C_new / B(); // Remove B component of C_new. - return z * B() + Sqrt( ( 1.0 - z * z ) / ( D * D ) ) * D; - } - - /*-------------------------------------------------------------------------* - * Coord * - * * - * Compute the two coordinates (x1,x2) corresponding to a point in the * - * spherical triangle. This is the inverse of "Chart". * - * * - *-------------------------------------------------------------------------*/ - Vec2 SphericalTriangle::Coord( const Vec3 &P1 ) const - { - Vec3 P = Unit( P1 ); - - // Compute the new C vertex, which lies on the arc defined by B-P - // and the arc defined by A-C. - - Vec3 C_new = Unit( ( B() ^ P ) ^ ( C() ^ A() ) ); - - // Adjust the sign of C_new. Make sure it's on the arc between A and C. - - if( C_new * ( A() + C() ) < 0.0 ) C_new = -C_new; - - // Compute x1, the area of the sub-triangle over the original area. - - float cos_beta = CosDihedralAngle( A(), B(), C_new ); - float cos_gamma = CosDihedralAngle( A(), C_new , B() ); - float sub_area = Alpha() + acos( cos_beta ) + acos( cos_gamma ) - Pi; - float x1 = sub_area / SolidAngle(); - - // Now compute the second coordinate using the new C vertex. - - float z = P * B(); - float x2 = ( 1.0 - z ) / ( 1.0 - C_new * B() ); - - if( x1 < 0.0 ) x1 = 0.0; if( x1 > 1.0 ) x1 = 1.0; - if( x2 < 0.0 ) x2 = 0.0; if( x2 > 1.0 ) x2 = 1.0; - return Vec2( x1, x2 ); - } - - /*-------------------------------------------------------------------------* - * Dual * - * * - * Construct the dual triangle of the current triangle, which is another * - * spherical triangle. * - * * - *-------------------------------------------------------------------------*/ - SphericalTriangle SphericalTriangle::Dual() const - { - Vec3 dual_A = B() ^ C(); if( dual_A * A() < 0.0 ) dual_A *= -1.0; - Vec3 dual_B = A() ^ C(); if( dual_B * B() < 0.0 ) dual_B *= -1.0; - Vec3 dual_C = A() ^ B(); if( dual_C * C() < 0.0 ) dual_C *= -1.0; - return SphericalTriangle( dual_A, dual_B, dual_C ); - } - - /*-------------------------------------------------------------------------* - * VecIrrad * - * * - * Return the "vector irradiance" due to a light source of unit brightness * - * whose spherical projection is this spherical triangle. The negative of * - * this vector dotted with the surface normal gives the (scalar) * - * irradiance at the origin. * - * * - *-------------------------------------------------------------------------*/ - Vec3 SphericalTriangle::VecIrrad() const - { - Vec3 Phi = - a() * Unit( B() ^ C() ) + - b() * Unit( C() ^ A() ) + - c() * Unit( A() ^ B() ) ; - if( Orient() ) Phi *= -1.0; - return Phi; - } - - /*-------------------------------------------------------------------------* - * New_Alpha * - * * - * Returns a new spherical triangle derived from the original one by * - * moving the "C" vertex along the edge "BC" until the new "alpha" angle * - * equals the given argument. * - * * - *-------------------------------------------------------------------------*/ - SphericalTriangle SphericalTriangle::New_Alpha( float alpha ) const - { - Vec3 V1( A() ), V2( B() ), V3( C() ); - Vec3 E1 = Unit( V2 ^ V1 ); - Vec3 E2 = E1 ^ V1; - Vec3 G = ( cos(alpha) * E1 ) + ( sin(alpha) * E2 ); - Vec3 D = Unit( V3 / V2 ); - Vec3 C2 = ((G * D) * V2) - ((G * V2) * D); - if( Triple( V1, V2, C2 ) > 0.0 ) C2 *= -1.0; - return SphericalTriangle( V1, V2, C2 ); - } - - std::ostream &operator<<( std::ostream &out, const SphericalTriangle &T ) - { - out << "SphericalTriangle:\n" - << " " << T.A() << "\n" - << " " << T.B() << "\n" - << " " << T.C() << std::endl; - return out; - } - -}; diff --git a/src/nvtt/bc7/arvo/SphTri.h b/src/nvtt/bc7/arvo/SphTri.h deleted file mode 100644 index 7336dc7..0000000 --- a/src/nvtt/bc7/arvo/SphTri.h +++ /dev/null @@ -1,124 +0,0 @@ -/*************************************************************************** -* SphTri.h * -* * -* This file defines the SphericalTriangle class definition, which * -* supports member functions for Monte Carlo sampling, point containment, * -* and other basic operations on spherical triangles. * -* * -* Changes: * -* 01/01/2000 arvo Added New_{Alpha,Beta,Gamma} methods. * -* 12/30/1999 arvo Added VecIrrad method for "Vector Irradiance". * -* 04/08/1995 arvo Further optimized sampling algorithm. * -* 10/11/1994 arvo Added analytic sampling algorithm. * -* 06/14/1994 arvo Initial implementation. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1995, 2000, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __SPHTRI_INCLUDED__ -#define __SPHTRI_INCLUDED__ - -#include "Vec3.h" -#include "Vec2.h" - -namespace ArvoMath { - - /* - * The (Oblique) Spherical Triangle ABC. Edge lengths (segments of great - * circles) are a, b, and c. The (dihedral) angles are Alpha, Beta, and Gamma. - * - * B - * o - * / \ - * / \ - * /Beta \ - * / \ - * c / \ a - * / \ - * / \ - * / \ - * / \ - * / \ - * /Alpha Gamma \ - * o-----------------------o - * A b C - * - */ - - class SphericalTriangle { - - public: // methods - SphericalTriangle() { Init(); } - SphericalTriangle( const SphericalTriangle &T ) { *this = T; } - SphericalTriangle( const Vec3 &, const Vec3 &, const Vec3 & ); - SphericalTriangle & operator()( const Vec3 &, const Vec3 &, const Vec3 & ); - ~SphericalTriangle( ) {} - void operator=( const SphericalTriangle &T ) { *this = T; } - Vec3 Chart ( float x, float y ) const; // Const-Jacobian map from square. - Vec2 Coord ( const Vec3 &P ) const; // Get 2D coords of a point. - int Orient( ) const { return orient; } - int Inside( const Vec3 & ) const; - float SolidAngle() const { return area; } - float SignedSolidAngle() const { return -orient * area; } // CC is pos. - const Vec3 &A() const { return A_ ; } - const Vec3 &B() const { return B_ ; } - const Vec3 &C() const { return C_ ; } - float a() const { return a_ ; } - float b() const { return b_ ; } - float c() const { return c_ ; } - float Cos_a() const { return cos_a ; } - float Cos_b() const { return cos_b ; } - float Cos_c() const { return cos_c ; } - float Alpha() const { return alpha ; } - float Beta () const { return beta ; } - float Gamma() const { return gamma ; } - float CosAlpha() const { return cos_alpha; } - float CosBeta () const { return cos_beta ; } - float CosGamma() const { return cos_gamma; } - Vec3 VecIrrad() const; // Returns the vector irradiance. - SphericalTriangle Dual() const; - SphericalTriangle New_Alpha( float alpha ) const; - SphericalTriangle New_Beta ( float beta ) const; - SphericalTriangle New_Gamma( float gamma ) const; - - private: // methods - void Init( ); - void Init( const Vec3 &A, const Vec3 &B, const Vec3 &C ); - - private: // data - Vec3 A_, B_, C_, U; // The vertices (and a temp vector). - float a_, b_, c_; // The edge lengths. - float alpha, beta, gamma; // The angles. - float cos_a, cos_b, cos_c; - float cos_alpha, cos_beta, cos_gamma; - float area; - float sin_alpha, product; // Used in sampling algorithm. - int orient; // Orientation. - }; - - inline double CosDihedralAngle( const Vec3 &A, const Vec3 &B, const Vec3 &C ) - { - float x = Unit( A ^ B ) * Unit( C ^ B ); - if( x < -1.0 ) x = -1.0; - if( x > 1.0 ) x = 1.0; - return x; - } - - inline double DihedralAngle( const Vec3 &A, const Vec3 &B, const Vec3 &C ) - { - return acos( CosDihedralAngle( A, B, C ) ); - } - - extern std::ostream &operator<<( std::ostream &out, const SphericalTriangle & ); -}; -#endif diff --git a/src/nvtt/bc7/arvo/Token.cpp b/src/nvtt/bc7/arvo/Token.cpp deleted file mode 100644 index 9575d92..0000000 --- a/src/nvtt/bc7/arvo/Token.cpp +++ /dev/null @@ -1,913 +0,0 @@ -/*************************************************************************** -* Token.h * -* * -* The Token class ecapsulates a lexical analyzer for C++-like syntax. * -* A token instance is associated with one or more text files, and * -* grabs C++ tokens from them sequentially. There are many member * -* functions designed to make parsing easy, such as "==" operators for * -* strings and characters, and automatic conversion of numeric tokens * -* into numeric values. * -* * -* Files can be nested via #include directives, and both styles of C++ * -* comments are supported. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 10/05/99 Fixed bug in TokFrame string allocation. * -* arvo 01/15/95 Added ifdef, ifndef, else, and endif. * -* arvo 02/13/94 Added Debug() member function. * -* arvo 01/22/94 Several sections rewritten. * -* arvo 06/19/93 Converted to C++ * -* arvo 07/15/89 Rewritten for scene description parser. * -* arvo 01/22/89 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1999, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include -#include "Token.h" -#include "Char.h" - -namespace ArvoMath { - - FILE* Token::debug = NULL; // Static data member of Token class. - int Token::argc = 0; - char** Token::argv = NULL; - - typedef TokMacro *TokMacroPtr; - - static const int True = 1; - static const int False = 0; - static const int HashConst = 217; // Size of hash-table for macros. - - - TokFrame::TokFrame() - { - next = NULL; - source = NULL; - fname = NULL; - line = 0; - column = 0; - } - - TokFrame::~TokFrame() - { - if( fname != NULL ) delete[] fname; - if( source != NULL ) fclose( source ); - } - - void TokFrame::operator=( const TokFrame &frame ) - { - next = frame.next; - source = frame.source; - fname = strdup( frame.fname ); - line = frame.line; - column = frame.column; - } - - static int HashName( const char *str ) - { - static int prime[5] = { 7, 11, 17, 23, 3 }; - int k = 0; - int h = 0; - while( *str != NullChar ) - { - h += (*str++) * prime[k++]; - if( k == 5 ) k = 0; - } - if( h < 0 ) h = 0; // Check for overflow. - return h % HashConst; - } - - TokMacro *Token::MacroLookup( const char *str ) const - { - if( table == NULL ) return NULL; - int i = HashName( str ); - for( TokMacro *m = table[i]; m != NULL; m = m->next ) - { - if( strcmp( str, m->macro ) == 0 ) return m; - } - return NULL; - } - - int Token::MacroReplace( char *str, int &length, TokType &type ) const - { - TokMacro *m = MacroLookup( str ); - if( m == NULL ) return 0; - strcpy( str, m->repl ); - length = strlen( str ); - type = m->type; - return 1; - } - - /*-------------------------------------------------------------------------* - * D e b u g P r i n t * - * * - * This routine is used to record the entire token stream in a file to * - * use as a debugging aid. It does not affect the action of the lexer; * - * it merely records a "shadow" copy of all the tokens that are read by * - * ANY Token instance. The data that is written to the file is * - * * - * * - * * - *-------------------------------------------------------------------------*/ - static void DebugPrint( const Token &tok, FILE *fp ) - { - fprintf( fp, "%3d %3d ", tok.Line(), tok.Column() ); - fprintf( fp, "%s " , tok.FileName() ); - fprintf( fp, "%s\n" , tok.Spelling() ); - fflush ( fp ); - } - - /*-------------------------------------------------------------------------* - * T o k e n (Constructors) * - * * - *-------------------------------------------------------------------------*/ - Token::Token( const char *file_name ) - { - Init(); - Open( file_name ); - } - - Token::Token( FILE *fp ) - { - Init(); - Open( fp ); - } - - Token::Token( ) - { - Init(); - } - - /*-------------------------------------------------------------------------* - * T o k e n (Destructor) * - * * - * Close all files and deletes all frames and paths. * - * * - *-------------------------------------------------------------------------*/ - Token::~Token( ) - { - // Don't try to delete "frame" as its a member of this class, not - // something that we've allocated. - TokFrame *f = frame.next; - while( f != NULL ) - { - TokFrame *n = f->next; - delete f; - f = n; - } - ClearPaths(); - } - - /*-------------------------------------------------------------------------* - * O p e n * - * * - * Establish a new file to read from, either by name, or by pointer. * - * * - *-------------------------------------------------------------------------*/ - void Token::Open( const char *file_name ) - { - FILE *fp = fopen( file_name, "r" ); - if( fp == NULL ) return; - Open( fp ); - frame.fname = strdup( file_name ); - } - - void Token::Open( FILE *fp ) - { - frame.source = fp; - frame.line = 1; - frame.column = 0; - pushed = NullChar; - } - - /*-------------------------------------------------------------------------* - * O p e r a t o r == * - * * - * A token can be compared with a string, a single character, or a type. * - * * - *-------------------------------------------------------------------------*/ - int Token::operator==( const char *s ) const - { - const char *t = spelling; - if( case_sensitive ) - { - do { if( *s != *t ) return False; } - while( *s++ && *t++ ); - } - else - { - do { if( ToUpper(*s) != ToUpper(*t) ) return False; } - while( *s++ && *t++ ); - } - return True; - } - - int Token::operator==( char c ) const - { - if( length != 1 ) return False; - if( case_sensitive ) return spelling[0] == c; - else return ToUpper(spelling[0]) == ToUpper(c); - } - - int Token::operator==( TokType _type_ ) const - { - int match = 0; - switch( _type_ ) - { - case T_char : match = ( type == T_string && Len() == 1 ); break; - case T_numeric: match = ( type == T_integer || type == T_float ); break; - default : match = ( type == _type_ ); break; - } - return match; - } - - /*-------------------------------------------------------------------------* - * O p e r a t o r != * - * * - * Define negations of the three types of "==" tests. * - * * - *-------------------------------------------------------------------------*/ - int Token::operator!=( const char *s ) const { return !( *this == s ); } - int Token::operator!=( char c ) const { return !( *this == c ); } - int Token::operator!=( TokType t ) const { return !( *this == t ); } - - /*-------------------------------------------------------------------------* - * E r r o r * - * * - * Print error message to "stderr" followed by optional "name". * - * * - *-------------------------------------------------------------------------*/ - void Token::Error( TokError error, const char *name ) - { - char *s; - switch( error ) - { - case T_malformed_float : s = "malformed real number "; break; - case T_unterm_string : s = "unterminated string "; break; - case T_unterm_comment : s = "unterminated comment "; break; - case T_file_not_found : s = "include file not found: "; break; - case T_unknown_directive : s = "unknown # directive "; break; - case T_string_expected : s = "string expected "; break; - case T_putback_error : s = "putback overflow "; break; - case T_name_too_long : s = "file name is too long "; break; - case T_no_endif : s = "#endif directive missing"; break; - case T_extra_endif : s = "#endif with no #ifdef "; break; - case T_extra_else : s = "#else with no #ifdef "; break; - default : s = "unknown error type "; break; - } - fprintf( stderr, "LEXICAL ERROR, line %d, column %d: %s", - frame.line, frame.column, s ); - if( name == NULL ) - fprintf( stderr, " \n" ); - else fprintf( stderr, "%s\n", name ); - exit( 1 ); - } - - /*-------------------------------------------------------------------------* - * G e t c * - * * - * This routine fetches one character at a time from the current file * - * being read. It is responsible for keeping track of the column number * - * and for handling single characters that have been "put back". * - * * - *-------------------------------------------------------------------------*/ - int Token::Getc( int &c ) - { - if( pushed != NullChar ) // Return the pushed character. - { - c = pushed; - pushed = NullChar; - } - else // Get a new character from the source file. - { - c = getc( frame.source ); - frame.column++; - } - return c; - } - - /*-------------------------------------------------------------------------* - * N o n W h i t e * - * * - * This routine implements a simple finite state machine that skips * - * white space and recognizes the two styles of comments used in C++. * - * It returns the first non-white character not part of a comment. * - * * - *-------------------------------------------------------------------------*/ - int Token::NonWhite( int &c ) - { -start_state: - Getc( c ); - if( c == Space ) goto start_state; - if( c == Tab ) goto start_state; - if( c == NewLine ) goto start_new_line; - if( c == Slash ) goto start_comment; - goto return_char; - -start_comment: - Getc( c ); - if( c == Star ) goto in_comment1; - if( c == Slash ) goto in_comment2; - Unget( c ); - c = Slash; - goto return_char; - -in_comment1: - Getc( c ); - if( c == Star ) goto end_comment1; - if( c == NewLine ) goto newline_in_comment; - if( c == EOF ) goto return_char; - goto in_comment1; - -end_comment1: - Getc( c ); - if( c == Slash ) goto start_state; - if( c == NewLine ) goto newline_in_comment; - if( c == EOF ) goto unterm_comment; - goto in_comment1; - -in_comment2: - Getc( c ); - if( c == NewLine ) goto start_new_line; - if( c == EOF ) goto return_char; - goto in_comment2; - -unterm_comment: - Error( T_unterm_comment ); - c = EOF; - goto return_char; - -start_new_line: - frame.line++; - frame.column = 0; - goto start_state; - -newline_in_comment: - frame.line++; - frame.column = 0; - goto in_comment1; - -return_char: - Tcolumn = frame.column; // This is where the token starts. - return c; - } - - /*-------------------------------------------------------------------------* - * N e x t R a w T o k * - * * - *-------------------------------------------------------------------------*/ - int Token::NextRawTok( ) - { - static int Trans0[] = { 0, 1, 3, 3, 3 }; // Found a digit. - static int Trans1[] = { 5, 6, 4, 6, 7 }; // Found a sign. - static int Trans2[] = { 1, 6, 7, 6, 7 }; // Found decimal point. - static int Trans3[] = { 2, 2, 7, 6, 7 }; // Found an exponent. - static int Trans4[] = { 5, 6, 7, 6, 7 }; // Found something else. - char *tok = spelling; - int state; - int c; - - length = 0; - type = T_null; - - // Skip comments and whitespace. - - if( NonWhite( c ) == EOF ) goto endtok; - - // Is this the beginning of an identifier? If so, get the rest. - - if( isAlpha( c ) ) - { - type = T_ident; - do { - *tok++ = c; - length++; - if( Getc( c ) == EOF ) goto endtok; - } - while( isAlpha( c ) || isDigit( c ) || c == Underscore ); - Unget( c ); - goto endtok; - } - - // Is this the beginning of a number? - - else if( isDigit( c ) || c == Minus || c == Period ) - { - char c1 = c; - state = 0; - for(;;) - { - *tok++ = c; - length++; - switch( Getc( c ) ) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': state = Trans0[ state ]; break; - case '+': - case '-': state = Trans1[ state ]; break; - case '.': state = Trans2[ state ]; break; - case 'e': - case 'E': state = Trans3[ state ]; break; - default : state = Trans4[ state ]; break; - } - switch( state ) - { - case 5 : Unget( c ); - type = ( c1 == Period ) ? T_float : T_integer; - goto endtok; - case 6 : Unget( c ); type = T_float ; goto endtok; - case 7 : Error( T_malformed_float ) ; break; - default: continue; - } - } // for - } // if numeric - - // Is this the beginning of an operator? - - if( c == '*' || c == '>' || c == '<' || c == '+' || c == '-' || c == '!' ) - { - char oldc = c; - type = T_other; - *tok++ = c; - length++; - if( Getc( c ) == EOF ) goto endtok; - if( c == oldc || c == EqualSign ) - { - *tok++ = c; - length++; - } - else Unget( c ); - goto endtok; - } - - // Is this the beginning of a string? - - else if( c == DoubleQuote ) - { - type = T_string; - while( Getc( c ) != EOF && length < MaxTokenLen ) - { - if( c == DoubleQuote ) goto endtok; - *tok++ = c; - length++; - } - Error( T_unterm_string ); - } - - // Is this the beginning of a "#" directive? - - else if( c == Hash ) - { - type = T_directive; - NonWhite( c ); - while( isAlpha( c ) ) - { - *tok++ = c; - length++; - Getc( c ); - } - Unget( c ); - goto endtok; - } - - // This must be a one-character token. - - else - { - *tok++ = c; - length = 1; - type = T_other; - } - -endtok: // Jump to here when token is completed. - - *tok = NullChar; // Terminate the string. - if( debug != NULL ) DebugPrint( *this, debug ); - - return length; - } - - /*-------------------------------------------------------------------------* - * N e x t T o k * - * * - *-------------------------------------------------------------------------*/ - int Token::NextTok( ) - { - NextRawTok(); - - // If the token is an identifier, see if it's a macro. - // If the macro substitution is null, get another token. - - if( type == T_ident ) - { - if( table != NULL ) - { - if( MacroReplace( spelling, length, type ) && debug != NULL ) - DebugPrint( *this, debug ); - } - if( type == T_nullmacro ) NextTok(); - } - return length; - } - - /*-------------------------------------------------------------------------* - * O p e r a t o r - - * - * * - * Puts back the last token found. Only one token can be put back. * - * * - *-------------------------------------------------------------------------*/ - Token & Token::operator--( ) // Put the last token back. - { - if( put_back ) Error( T_putback_error ); // Can only handle one putback. - put_back = 1; - return *this; - } - - Token & Token::operator--( int ) // Postfix decrement. - { - fprintf( stderr, "Postfix decrement is not implemented for the Token class.\n" ); - return *this; - } - - /*-------------------------------------------------------------------------* - * H a n d l e D i r e c t i v e * - * * - * Directive beginning with "#" must be handled by the lexer, as they * - * determine the current source file via "#include", etc. * - * * - * Returns 1 if, after handling this directive, we now have the next * - * token. * - * * - *-------------------------------------------------------------------------*/ - int Token::HandleDirective( ) - { - FILE *fp; - char name[128]; - if( *this == "define" ) - { - NextRawTok(); - strcpy( tempbuff, Spelling() ); // This is the macro name. - int line = Line(); - NextRawTok(); - if( Line() == line ) - AddMacro( tempbuff, Spelling(), Type() ); - else - { - // If next token is on a different line; we went too far. - AddMacro( tempbuff, "", T_nullmacro ); - return 1; // Signal that we already have the next token. - } - } - else if( *this == "include" ) - { - NextRawTok(); - if( *this == "<" ) - { - GetName( name, sizeof(name) ); - PushFrame( ResolveName( name ), name ); - } - else if( type == T_string ) - { - fp = fopen( spelling, "r" ); - if( fp == NULL ) Error( T_file_not_found, spelling ); - else PushFrame( fp, spelling ); - } - else Error( T_string_expected ); - } - else if( *this == "ifdef" ) - { - NextRawTok(); - TokMacro *m = MacroLookup( Spelling() ); - if( m == NULL ) // Skip until else or endif. - { - while( *this != T_null ) - { - NextRawTok(); - if( *this != T_directive ) continue; - if( *this == "endif" ) break; - if( *this == "else" ) { if_nesting++; break; } // Like m != NULL. - } - if( *this == T_null ) Error( T_no_endif ); - return 0; // Ready to get the next token. - } - else if_nesting++; - } - else if( *this == "ifndef" ) - { - NextRawTok(); - TokMacro *m = MacroLookup( Spelling() ); - if( m != NULL ) // Skip until else or endif. - { - while( *this != T_null ) - { - NextRawTok(); - if( *this != T_directive ) continue; - if( *this == "endif" ) break; - if( *this == "else" ) { if_nesting++; break; } // Like m == NULL. - } - if( *this == T_null ) Error( T_no_endif ); - return 0; // Ready to get the next token. - } - else if_nesting++; - } - else if( *this == "else" ) // Skip until #endif. - { - if( if_nesting == 0 ) Error( T_extra_else ); - while( *this != T_null ) - { - NextRawTok(); - if( *this == T_directive && *this == "endif" ) break; - } - if( *this == T_null ) Error( T_no_endif ); - if_nesting--; - return 0; // Ready to get next token. - } - else if( *this == "endif" ) - { - if( if_nesting == 0 ) Error( T_extra_endif ); - if_nesting--; - return 0; // Ready to get next token. - } - else if( *this == "error" ) - { - int line = Line(); - NextTok(); // Allow macro substitution. - if( Line() == line ) - { - fprintf( stderr, "(preprocessor, line %d) %s\n", line, Spelling() ); - return 0; // Ready to get next token. - } - else - { - // If next token is on a different line; we went too far. - fprintf( stderr, "(null preprocessor message, line %d)\n", line ); - return 1; // Signal that we already have the next token. - } - } - return 0; - } - - - /*-------------------------------------------------------------------------* - * O p e r a t o r + + * - * * - * Grab the next token from the current source file. If at end of file, * - * pick up where we left off in the previous file. If there is no * - * previous file, return "T_null". * - * * - *-------------------------------------------------------------------------*/ - Token & Token::operator++( ) - { - if( put_back ) - { - put_back = 0; - return *this; - } - - // If we've reached the end of an include file, pop the stack. - - for(;;) - { - NextTok(); - if( type == T_directive ) - { - if( HandleDirective() ) break; - } - else if( type == T_null ) - { - fclose( frame.source ); - if( !PopFrame() ) break; - } - else break; // We have a real token. - } - - // Now fill in the value fields if the token is a number. - - switch( type ) - { - case T_integer : ivalue = atoi( spelling ); break; - case T_float : fvalue = atof( spelling ); break; - case T_null : if( if_nesting > 0 ) Error( T_no_endif ); break; - default : break; - } - - return *this; - } - - Token & Token::operator++( int ) - { - fprintf( stderr, "Postfix increment is not implemented for the Token class.\n" ); - return *this; - } - - /*-------------------------------------------------------------------------* - * T o k e n Push & Pop Frame * - * * - * These functions are used to create and destroy the context "frames" * - * that are used to handle nested files (via "include"). * - * * - *-------------------------------------------------------------------------*/ - void Token::PushFrame( FILE *fp, char *fname ) - { - // Create a copy of the current (top-level) frame. - - TokFrame *n = new TokFrame; - *n = frame; - - // Now overwrite the top-level frame with the new state. - - frame.next = n; - frame.source = fp; - frame.line = 1; - frame.column = 0; - frame.fname = strdup( fname ); - pushed = NullChar; - } - - int Token::PopFrame() - { - if( frame.next == NULL ) return 0; - TokFrame *old = frame.next; - frame = *old; - delete old; // Delete the frame that we just copied from. - return 1; - } - - /*-------------------------------------------------------------------------* - * Miscellaneous Functions * - * * - *-------------------------------------------------------------------------*/ - void Token::Init() - { - case_sensitive = 1; - put_back = 0; - pushed = NullChar; - if_nesting = 0; - frame.source = NULL; - frame.next = NULL; - frame.fname = NULL; - first = NULL; - last = NULL; - table = NULL; - pushed = NullChar; - SearchArgs(); // Search command-line args for macro definitions. - } - - const char* Token::Spelling() const - { - return spelling; - } - - char Token::Char() const - { - return spelling[0]; - } - - const char* Token::FileName() const - { - static char *null_string = ""; - if( frame.fname == NULL ) return null_string; - else return frame.fname; - } - - float Token::Fvalue() const - { - float val = 0.0; - if( type == T_float ) val = fvalue; - if( type == T_integer ) val = ivalue; - return val; - } - - void Token::GetName( char *name, int max ) - { - int c; - for( int i = 1; i < max; i++ ) - { - if( NonWhite(c) == '>' ) - { - *name = NullChar; - return; - } - *name++ = c; - } - Error( T_name_too_long ); - } - - void Token::AddPath( const char *new_path ) - { - char *name = strdup( new_path ); - if( name == NULL ) return; - TokPath *p = new TokPath; - p->next = NULL; - p->path = name; - if( first == NULL ) first = p; - else last->next = p; - last = p; - } - - void Token::ClearPaths() - { - TokPath *p = first; - while( p != NULL ) - { - TokPath *q = p->next; - delete[] p->path; // delete the string. - delete p; // delete the path structure. - p = q; - } - first = NULL; - last = NULL; - } - - FILE *Token::ResolveName( const char *name ) - { - char resolved[128]; - for( const TokPath *p = first; p != NULL; p = p->next ) - { - strcpy( resolved, p->path ); - strcat( resolved, "/" ); - strcat( resolved, name ); - FILE *fp = fopen( resolved, "r" ); - if( fp != NULL ) return fp; - } - Error( T_file_not_found, name ); - return NULL; - } - - void Token::CaseSensitive( int on_off = 1 ) - { - case_sensitive = on_off; - } - - void Token::Debug( FILE *fp ) - { - debug = fp; - } - - void Token::AddMacro( const char *macro, const char *repl, TokType t ) - { - if( table == NULL ) // Create and initialize the table. - { - table = new TokMacroPtr[ HashConst ]; - for( int j = 0; j < HashConst; j++ ) table[j] = NULL; - } - int i = HashName( macro ); - TokMacro *m = new TokMacro; - m->next = table[i]; - m->macro = strdup( macro ); - m->repl = strdup( repl ); - m->type = t; - table[i] = m; - } - - void Token::Args( int argc_, char *argv_[] ) - { - argc = argc_; // Set the static variables. - argv = argv_; - } - - void Token::SearchArgs( ) - { - TokType type = T_null; - for( int i = 1; i < argc; i++ ) - { - if( strcmp( argv[i], "-macro" ) == 0 ) - { - if( i+2 >= argc ) - { - fprintf( stderr, "(Token) ERROR macro argument(s) missing\n" ); - return; - } - char *macro = argv[i+1]; - char *repl = argv[i+2]; - if( isAlpha ( repl[0] ) ) type = T_ident ; else - if( isInteger( repl ) ) type = T_integer; else - type = T_float ; - AddMacro( macro, repl, type ); - i += 2; - } - } - } -}; diff --git a/src/nvtt/bc7/arvo/Token.h b/src/nvtt/bc7/arvo/Token.h deleted file mode 100644 index eabdacc..0000000 --- a/src/nvtt/bc7/arvo/Token.h +++ /dev/null @@ -1,203 +0,0 @@ -/*************************************************************************** -* Token.h * -* * -* The Token class ecapsulates a lexical analyzer for C++-like syntax. * -* A token instance is associated with one or more text files, and * -* grabs C++ tokens from them sequentially. There are many member * -* functions designed to make parsing easy, such as "==" operators for * -* strings and characters, and automatic conversion of numeric tokens * -* into numeric values. * -* * -* Files can be nested via #include directives, and both styles of C++ * -* comments are supported. * -* * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 10/05/99 Fixed bug in TokFrame string allocation. * -* arvo 01/15/95 Added ifdef, ifndef, else, and endif. * -* arvo 02/13/94 Added Debug() member function. * -* arvo 01/22/94 Several sections rewritten. * -* arvo 06/19/93 Converted to C++ * -* arvo 07/15/89 Rewritten for scene description parser. * -* arvo 01/22/89 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1999, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __TOKEN_INCLUDED__ -#define __TOKEN_INCLUDED__ - -#include -#include - -namespace ArvoMath { - - const int MaxTokenLen = 128; - - typedef enum { - T_null, - T_char, // A string of length 1. - T_string, - T_integer, - T_float, - T_ident, - T_other, - T_numeric, // Either T_float or T_int (use with == operator). - T_directive, // Directives like #include are not returned to the user. - T_nullmacro - } TokType; - - typedef enum { - T_malformed_float, - T_unterm_string, - T_unterm_comment, - T_file_not_found, - T_unknown_directive, - T_string_expected, - T_putback_error, - T_name_too_long, - T_no_endif, - T_extra_endif, - T_extra_else - } TokError; - - class TokFrame { - public: - TokFrame(); - TokFrame( const TokFrame &frame ) { *this = frame; } - ~TokFrame(); - void operator=( const TokFrame & ); - public: - TokFrame *next; - FILE *source; - char *fname; - int line; - int column; - }; - - struct TokPath { - char *path; - TokPath *next; - }; - - struct TokMacro { - char *macro; - char *repl; - TokType type; - TokMacro *next; - }; - - class Token { - - public: - // Constructors and destructor. - - Token(); - Token( const char *file_name ); - Token( FILE *file_pointer ); - ~Token(); - - // Const data members for querying token information. - - TokType Type() const { return type; } // The type of token found. - int Len() const { return length; } // The length of the token. - int Line() const { return frame.line; } // The line it was found on. - int Column() const { return Tcolumn; } // The column it began in. - long Ivalue() const { return ivalue; } // Token value if an integer. - float Fvalue() const; // Token value if int or float. - char Char() const; // The token (if a Len() == 1). - - // Operators. - - int operator == ( const char* ) const; // 1 if strings match. - int operator != ( const char* ) const; // 0 if strings match. - int operator == ( char ) const; // 1 if token is this char. - int operator != ( char ) const; // 0 if token is this char. - int operator == ( TokType ) const; // 1 if token is of this type. - int operator != ( TokType ) const; // 0 if token is of this type. - Token & operator ++ ( ); // (prefix) Get the next token. - Token & operator -- ( ); // (prefix) Put back one token. - Token & operator ++ ( int ); // (postfix) Undefined. - Token & operator -- ( int ); // (postfix) Undefined. - - // State-setting member functions. - - void Open( FILE * ); // Read already opened file. - void Open( const char * ); // Open the named file. - void CaseSensitive( int on_off ); // Applies to == and != operators. - void AddPath( const char * ); // Adds path for <...> includes. - void ClearPaths(); // Remove all search paths. - - // Miscellaneous. - - const char* Spelling() const; // The token itself. - const char* FileName() const; // Current file being lexed. - static void Debug( FILE * ); // Write all token streams to a file. - static void Args ( int argc, char *argv[] ); // Search args for macro settings. - void AddMacro( const char*, const char*, TokType type ); - void SearchArgs(); - - private: - - // Private member functions. - - void Init(); - int Getc ( int & ); - void Unget( int c ) { pushed = c; } - void Error( TokError error, const char *name = NULL ); - int NonWhite( int & ); - int HandleDirective(); - int NextRawTok(); // No macro substitutions. - int NextTok(); - void PushFrame( FILE *fp, char *fname = NULL ); - int PopFrame(); - void GetName( char *name, int max ); - FILE *ResolveName( const char *name ); - TokMacro *MacroLookup( const char *str ) const; - int MacroReplace( char *str, int &length, TokType &type ) const; - - // Private data members. - - TokPath *first; - TokPath *last; - TokMacro **table; - TokFrame frame; - TokType type; - long ivalue; - float fvalue; - int length; - int Tcolumn; - int put_back; - int case_sensitive; - int pushed; - int if_nesting; - char spelling[ MaxTokenLen ]; - char tempbuff[ MaxTokenLen ]; - - // Static data members. - - static int argc; - static char **argv; - static FILE *debug; - }; - - - // Predicate-style functions for testing token types. - - inline int Null ( const Token &t ) { return t.Type() == T_null; } - inline int Numeric( const Token &t ) { return t.Type() == T_numeric; } - inline int StringP( const Token &t ) { return t.Type() == T_string; } -}; -#endif diff --git a/src/nvtt/bc7/arvo/Vec2.cpp b/src/nvtt/bc7/arvo/Vec2.cpp deleted file mode 100644 index cca6723..0000000 --- a/src/nvtt/bc7/arvo/Vec2.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/*************************************************************************** -* Vec2.C * -* * -* Basic operations on 2-dimensional vectors. This special case is useful * -* because nearly all operations are performed inline. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 05/22/98 Added TimedVec2, extending Vec2. * -* arvo 06/17/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1999, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include "ArvoMath.h" -#include "Vec2.h" -#include "form.h" - -namespace ArvoMath { - - const Vec2 Vec2::Zero; - const Vec2 Vec2::Xaxis( 1, 0 ); - const Vec2 Vec2::Yaxis( 0, 1 ); - - // Most routines are now inline. - - float Normalize( Vec2 &A ) - { - float d = Len( A ); - if( d != 0.0 ) - { - A.X() /= d; - A.Y() /= d; - } - return d; - } - - Vec2 Min( const Vec2 &A, const Vec2 &B ) - { - return Vec2( Min( A.X(), B.X() ), Min( A.Y(), B.Y() ) ); - } - - Vec2 Max( const Vec2 &A, const Vec2 &B ) - { - return Vec2( Max( A.X(), B.X() ), Max( A.Y(), B.Y() ) ); - } - - std::ostream &operator<<( std::ostream &out, const Vec2 &A ) - { - out << form( " %9.5f %9.5f\n", A.X(), A.Y() ); - return out; - } - - std::ostream &operator<<( std::ostream &out, const Mat2x2 &M ) - { - out << form( " %9.5f %9.5f\n", M(0,0), M(0,1) ) - << form( " %9.5f %9.5f\n", M(1,0), M(1,1) ) - << std::endl; - return out; - } - - Mat2x2::Mat2x2( const Vec2 &c1, const Vec2 &c2 ) - { - m[0][0] = c1.X(); - m[1][0] = c1.Y(); - m[0][1] = c2.X(); - m[1][1] = c2.Y(); - } - - // Return solution x of the system Ax = b. - Vec2 Solve( const Mat2x2 &A, const Vec2 &b ) - { - float MachEps = MachineEpsilon(); - Vec2 x; - double d = det( A ); - double n = Norm1( A ); - if( n <= MachEps || Abs(d) <= MachEps * n ) return Vec2::Zero; - x.X() = A(1,1) * b.X() - A(0,1) * b.Y(); - x.Y() = -A(1,0) * b.X() + A(0,0) * b.Y(); - return x / d; - } -}; diff --git a/src/nvtt/bc7/arvo/Vec2.h b/src/nvtt/bc7/arvo/Vec2.h deleted file mode 100644 index 7aca458..0000000 --- a/src/nvtt/bc7/arvo/Vec2.h +++ /dev/null @@ -1,358 +0,0 @@ -/*************************************************************************** -* Vec2.h * -* * -* Basic operations on 2-dimensional vectors. This special case is useful * -* because nearly all operations are performed inline. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 05/22/98 Added TimedVec2, extending Vec2. * -* arvo 06/17/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1999, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __VEC2_INCLUDED__ -#define __VEC2_INCLUDED__ - -#include -#include -#include "ArvoMath.h" - -namespace ArvoMath { - - class Vec2; // 2-D floating-point vector. - class TimedVec2; // 2-D vector with a time stamp. - class Mat2x2; // 2x2 floating-point matrix. - - class Vec2 { - public: - Vec2( ) { x = 0.0; y = 0.0; } - Vec2( float a, float b ) { x = a; y = b; } - Vec2( const Vec2 &A ) { x = A.X(); y = A.Y(); } - ~Vec2() {} - Vec2 &operator=( float s ) { return Set( s, s ); } - Vec2 &operator=( const Vec2 &A ) { return Set( A.X(), A.Y() ); } - float X() const { return x; } - float Y() const { return y; } - float &X() { return x; } - float &Y() { return y; } - float operator[]( int i ) const { return *( &x + i ); } - float &operator[]( int i ) { return *( &x + i ); } - Vec2 &Set( float a, float b ) { x = a; y = b; return *this; } - Vec2 &Set( const Vec2 &A ) { return Set( A.X(), A.Y() ); } - public: - static const Vec2 Zero; - static const Vec2 Xaxis; - static const Vec2 Yaxis; - protected: - float x, y; - }; - - // This class simply adds a time field to the Vec2 class so that time-stamped - // coordinates can be easily inserted into objects such as Polylines. - - class TimedVec2 : public Vec2 { - public: - TimedVec2() { time = 0; } - TimedVec2( const Vec2 &p , long u = 0 ) { Set( p ); time = u; } - TimedVec2( float x, float y, long u = 0 ) { Set(x,y); time = u; } - ~TimedVec2() {} - Vec2 &Coord() { return *this; } - Vec2 Coord() const { return *this; } - long Time () const { return time; } - void SetTime( long u ) { time = u; } - protected: - long time; - }; - - class Mat2x2 { - public: - Mat2x2( ) { Set( 0, 0, 0, 0 ); } - Mat2x2( float a, float b, float c, float d ) { Set( a, b, c, d ); } - Mat2x2( const Vec2 &c1, const Vec2 &c2 ); - ~Mat2x2( ) {} - Mat2x2 &operator*=( float scale ); - Mat2x2 operator* ( float scale ) const; - void Set( float a, float b, float c, float d ) - { m[0][0] = a; m[0][1] = b; m[1][0] = c; m[1][1] = d; } - float operator()( int i, int j ) const { return m[i][j]; } - float &operator()( int i, int j ) { return m[i][j]; } - private: - float m[2][2]; - }; - - - //========================================== - //=== Miscellaneous external functions === - //========================================== - - extern float Normalize( Vec2 &A ); - extern Vec2 Min ( const Vec2 &A, const Vec2 &B ); - extern Vec2 Max ( const Vec2 &A, const Vec2 &B ); - - - //========================================== - //=== Norm-related functions === - //========================================== - - inline double LenSqr ( const Vec2 &A ) { return Sqr(A[0]) + Sqr(A[1]); } - inline double Len ( const Vec2 &A ) { return sqrt( LenSqr( A ) ); } - inline double OneNorm( const Vec2 &A ) { return Abs( A.X() ) + Abs( A.Y() ); } - inline double TwoNorm( const Vec2 &A ) { return Len(A); } - inline float SupNorm( const Vec2 &A ) { return MaxAbs( A.X(), A.Y() ); } - - - //========================================== - //=== Addition === - //========================================== - - inline Vec2 operator+( const Vec2 &A, const Vec2 &B ) - { - return Vec2( A.X() + B.X(), A.Y() + B.Y() ); - } - - inline Vec2& operator+=( Vec2 &A, const Vec2 &B ) - { - A.X() += B.X(); - A.Y() += B.Y(); - return A; - } - - - //========================================== - //=== Subtraction === - //========================================== - - inline Vec2 operator-( const Vec2 &A, const Vec2 &B ) - { - return Vec2( A.X() - B.X(), A.Y() - B.Y() ); - } - - inline Vec2 operator-( const Vec2 &A ) - { - return Vec2( -A.X(), -A.Y() ); - } - - inline Vec2& operator-=( Vec2 &A, const Vec2 &B ) - { - A.X() -= B.X(); - A.Y() -= B.Y(); - return A; - } - - - //========================================== - //=== Multiplication === - //========================================== - - inline Vec2 operator*( float c, const Vec2 &A ) - { - return Vec2( c * A.X(), c * A.Y() ); - } - - inline Vec2 operator*( const Vec2 &A, float c ) - { - return Vec2( c * A.X(), c * A.Y() ); - } - - inline float operator*( const Vec2 &A, const Vec2 &B ) // Inner product - { - return A.X() * B.X() + A.Y() * B.Y(); - } - - inline Vec2& operator*=( Vec2 &A, float c ) - { - A.X() *= c; - A.Y() *= c; - return A; - } - - //========================================== - //=== Division === - //========================================== - - inline Vec2 operator/( const Vec2 &A, float c ) - { - return Vec2( A.X() / c, A.Y() / c ); - } - - inline Vec2 operator/( const Vec2 &A, const Vec2 &B ) - { - return A - B * (( A * B ) / LenSqr( B )); - } - - - //========================================== - //=== Comparison === - //========================================== - - inline int operator==( const Vec2 &A, const Vec2 &B ) - { - return A.X() == B.X() && A.Y() == B.Y(); - } - - inline int operator!=( const Vec2 &A, const Vec2 &B ) - { - return A.X() != B.X() || A.Y() != B.Y(); - } - - inline int operator<=( const Vec2 &A, const Vec2 &B ) - { - return A.X() <= B.X() && A.Y() <= B.Y(); - } - - inline int operator<( const Vec2 &A, const Vec2 &B ) - { - return A.X() < B.X() && A.Y() < B.Y(); - } - - inline int operator>=( const Vec2 &A, const Vec2 &B ) - { - return A.X() >= B.X() && A.Y() >= B.Y(); - } - - inline int operator>( const Vec2 &A, const Vec2 &B ) - { - return A.X() > B.X() && A.Y() > B.Y(); - } - - //========================================== - //=== Miscellaneous === - //========================================== - - inline float operator|( const Vec2 &A, const Vec2 &B ) // Inner product - { - return A * B; - } - - inline Vec2 Unit( const Vec2 &A ) - { - float c = LenSqr( A ); - if( c > 0.0 ) c = 1.0 / sqrt( c ); - return c * A; - } - - inline Vec2 Unit( const Vec2 &A, float &len ) - { - float c = LenSqr( A ); - if( c > 0.0 ) - { - len = sqrt( c ); - return A / len; - } - len = 0.0; - return A; - } - - inline Vec2 Unit( float x, float y ) - { - return Unit( Vec2( x, y ) ); - } - - inline double dist( const Vec2 &A, const Vec2 &B ) - { - return Len( A - B ); - } - - inline float operator^( const Vec2 &A, const Vec2 &B ) - { - return A.X() * B.Y() - A.Y() * B.X(); - } - - inline int Quadrant( const Vec2 &A ) - { - if( A.Y() >= 0.0 ) return A.X() >= 0.0 ? 1 : 2; - return A.X() >= 0.0 ? 4 : 3; - } - - inline Vec2 OrthogonalTo( const Vec2 &A ) // A vector orthogonal to that given. - { - return Vec2( -A.Y(), A.X() ); - } - - inline Vec2 Interpolate( const Vec2 &A, const Vec2 &B, float t ) - { - // Compute a point along the segment joining points A and B - // according to the normalized parameter t in [0,1]. - return ( 1.0 - t ) * A + t * B; - } - - //========================================== - //=== Operations involving Matrices === - //========================================== - - inline Mat2x2 Outer( const Vec2 &A, const Vec2 &B ) // Outer product. - { - Mat2x2 C; - C(0,0) = A.X() * B.X(); - C(0,1) = A.X() * B.Y(); - C(1,0) = A.Y() * B.X(); - C(1,1) = A.Y() * B.Y(); - return C; - } - - inline Vec2 operator*( const Mat2x2 &M, const Vec2 &A ) - { - return Vec2( - M(0,0) * A.X() + M(0,1) * A.Y(), - M(1,0) * A.X() + M(1,1) * A.Y() - ); - } - - inline Mat2x2 &Mat2x2::operator*=( float scale ) - { - m[0][0] *= scale; - m[0][1] *= scale; - m[1][0] *= scale; - m[1][1] *= scale; - return *this; - } - - inline Mat2x2 Mat2x2::operator*( float scale ) const - { - return Mat2x2( - scale * m[0][0], scale * m[0][1], - scale * m[1][0], scale * m[1][1] - ); - } - - inline Mat2x2 operator*( float scale, const Mat2x2 &M ) - { - return M * scale; - } - - inline float Norm1( const Mat2x2 &A ) - { - return Max( Abs(A(0,0)) + Abs(A(0,1)), Abs(A(1,0)) + Abs(A(1,1)) ); - } - - inline double det( const Mat2x2 &A ) - { - return A(0,0) * A(1,1) - A(1,0) * A(0,1); - } - - extern Vec2 Solve( // Return solution x of the system Ax = b. - const Mat2x2 &A, - const Vec2 &b - ); - - //========================================== - //=== Output routines === - //========================================== - - extern std::ostream &operator<<( std::ostream &out, const Vec2 & ); - extern std::ostream &operator<<( std::ostream &out, const Mat2x2 & ); -}; -#endif diff --git a/src/nvtt/bc7/arvo/Vec3.cpp b/src/nvtt/bc7/arvo/Vec3.cpp deleted file mode 100644 index 1033f84..0000000 --- a/src/nvtt/bc7/arvo/Vec3.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/*************************************************************************** -* Vec3.C * -* * -* Basic operations on 3-dimensional vectors. This special case is useful * -* because many operations are performed inline. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 10/27/94 Reorganized (removed Col & Row distinction). * -* arvo 06/14/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1994, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include -#include "ArvoMath.h" -#include "Vec3.h" -#include "form.h" - -namespace ArvoMath { - - float Normalize( Vec3 &A ) - { - float d = Len( A ); - if( d > 0.0 ) - { - double c = 1.0 / d; - A.X() *= c; - A.Y() *= c; - A.Z() *= c; - } - return( d ); - } - - double Angle( const Vec3 &A, const Vec3 &B ) - { - double t = LenSqr(A) * LenSqr(B); - if( t <= 0.0 ) return 0.0; - return ArcCos( (A * B) / sqrt(t) ); - } - - /*-------------------------------------------------------------------------* - * O R T H O N O R M A L * - * * - * On Input A, B....: Two linearly independent 3-space vectors. * - * * - * On Return A.......: Unit vector pointing in original A direction. * - * B.......: Unit vector orthogonal to A and in subspace spanned * - * by original A and B vectors. * - * C.......: Unit vector orthogonal to both A and B, chosen so * - * that A-B-C forms a right-handed coordinate system. * - * * - *-------------------------------------------------------------------------*/ - int Orthonormal( Vec3 &A, Vec3 &B, Vec3 &C ) - { - if( Normalize( A ) == 0.0 ) return 1; - B /= A; - if( Normalize( B ) == 0.0 ) return 1; - C = A ^ B; - return 0; - } - - int Orthonormal( Vec3 &A, Vec3 &B ) - { - if( Normalize( A ) == 0.0 ) return 1; - B /= A; - if( Normalize( B ) == 0.0 ) return 1; - return 0; - } - - /*-------------------------------------------------------------------------* - * O R T H O G O N A L T O * - * * - * Returns a vector that is orthogonal to A (but of arbitrary length). * - * * - *-------------------------------------------------------------------------*/ - Vec3 OrthogonalTo( const Vec3 &A ) - { - float c = 0.5 * SupNorm( A ); - if( c == 0.0 ) return Vec3( 1.0, 0.0, 0.0 ); - if( c <= Abs(A.X()) ) return Vec3( -A.Y(), A.X(), 0.0 ); - if( c <= Abs(A.Y()) ) return Vec3( 0.0, -A.Z(), A.Y() ); - return Vec3( A.Z(), 0.0, -A.X() ); - } - - Vec3 Min( const Vec3 &A, const Vec3 &B ) - { - return Vec3( - Min( A.X(), B.X() ), - Min( A.Y(), B.Y() ), - Min( A.Z(), B.Z() )); - } - - Vec3 Max( const Vec3 &A, const Vec3 &B ) - { - return Vec3( - Max( A.X(), B.X() ), - Max( A.Y(), B.Y() ), - Max( A.Z(), B.Z() )); - } - - std::ostream &operator<<( std::ostream &out, const Vec3 &A ) - { - out << form( " %9.5f %9.5f %9.5f", A.X(), A.Y(), A.Z() ) << std::endl; - return out; - } -}; diff --git a/src/nvtt/bc7/arvo/Vec3.h b/src/nvtt/bc7/arvo/Vec3.h deleted file mode 100644 index b9d539f..0000000 --- a/src/nvtt/bc7/arvo/Vec3.h +++ /dev/null @@ -1,517 +0,0 @@ -/*************************************************************************** -* Vec3.h * -* * -* Basic operations on 3-dimensional vectors. This special case is useful * -* because many operations are performed inline. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 10/27/94 Reorganized (removed Col & Row distinction). * -* arvo 06/14/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1994, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __VEC3_INCLUDED__ -#define __VEC3_INCLUDED__ - -#include -#include -#include "Vec2.h" - -namespace ArvoMath { - - class Vec3 { - public: - Vec3( float c = 0.0 ) { x = c; y = c; z = c; } - Vec3( float a, float b, float c ) { x = a; y = b; z = c; } - Vec3( const Vec3 &A ) { x = A.X(); y = A.Y(); z = A.Z(); } - void operator=( float c ) { x = c; y = c; z = c; } - void operator=( const Vec3 &A ) { x = A.X(); y = A.Y(); z = A.Z(); } - void operator=( const Vec2 &A ) { x = A.X(); y = A.Y(); z = 0.0; } - ~Vec3() {} - float X() const { return x; } - float Y() const { return y; } - float Z() const { return z; } - float & X() { return x; } - float & Y() { return y; } - float & Z() { return z; } - float operator[]( int i ) const { return *( &x + i ); } - float & operator[]( int i ) { return *( &x + i ); } - private: - float x, y, z; - }; - - //class Mat3x3 { - //public: - // inline Mat3x3( ); - // Mat3x3( const Mat3x3 &M ) { *this = M; } - // Mat3x3( const Vec3 &, const Vec3 &, const Vec3 & ); // Three columns. - // ~Mat3x3( ) {} - // float operator()( int i, int j ) const { return m[i][j]; } - // float & operator()( int i, int j ) { return m[i][j]; } - // Mat3x3 & operator=( float ); - // Mat3x3 & operator=( const Mat3x3 & ); - // inline void ScaleRows( float, float, float ); - // inline void ScaleCols( float, float, float ); - // void Col( int n, const Vec3 & ); - // const float *Base() const { return &(m[0][0]); } - //private: - // float m[3][3]; - //}; - - //class Mat4x4 { - //public: - // Mat4x4( ); - // Mat4x4( const Mat4x4 &M ) { *this = M; } - // Mat4x4( const Mat3x3 &M ) ; - // ~Mat4x4( ) {} - // float operator()( int i, int j ) const { return m[i][j]; } - // float & operator()( int i, int j ) { return m[i][j]; } - // Mat4x4 & operator=( float ); - // Mat4x4 & operator=( const Mat4x4 & ); - // void Row( int i, int j, const Vec3 & ); - // void Col( int i, int j, const Vec3 & ); - // void ScaleRows( float, float, float, float ); - // void ScaleCols( float, float, float, float ); - // const float *Base() const { return &(m[0][0]); } - //private: - // float m[4][4]; - //}; - - - //========================================== - //=== External operators === - //========================================== - - //extern Vec3 operator * ( const Mat4x4 &, const Vec3 & ); - //extern Vec3 operator * ( const Vec3 &, const Mat4x4 & ); - //extern Mat3x3 operator * ( float , const Mat3x3 & ); - //extern Mat3x3 operator * ( const Mat3x3 &, float ); - //extern Mat3x3 operator / ( const Mat3x3 &, double ); - //extern Mat3x3 & operator *=( Mat3x3 &, float ); - //extern Mat3x3 & operator *=( Mat3x3 &, const Mat3x3 & ); - //extern Mat3x3 operator * ( const Mat3x3 &, const Mat3x3 & ); - //extern Mat3x3 operator + ( const Mat3x3 &, const Mat3x3 & ); - //extern Mat3x3 & operator +=( Mat3x3 &, const Mat3x3 & ); - //extern Mat3x3 operator - ( const Mat3x3 &, const Mat3x3 & ); - //extern Mat3x3 & operator -=( Mat3x3 &, const Mat3x3 & ); - //extern Mat4x4 operator * ( float , const Mat4x4 & ); - //extern Mat4x4 operator * ( const Mat4x4 &, float ); - //extern Mat4x4 operator / ( const Mat4x4 &, float ); - //extern Mat4x4 & operator *=( Mat4x4 &, float ); - //extern Mat4x4 operator * ( const Mat4x4 &, const Mat4x4 & ); - //extern Mat4x4 operator + ( const Mat4x4 &, const Mat4x4 & ); - //extern Mat4x4 & operator +=( Mat4x4 &, const Mat4x4 & ); - //extern Mat4x4 operator - ( const Mat4x4 &, const Mat4x4 & ); - //extern Mat4x4 & operator -=( Mat4x4 &, const Mat4x4 & ); - - - //========================================== - //=== Miscellaneous external functions === - //========================================== - - //extern Vec3 OrthogonalTo( const Vec3 & ); // A vector orthogonal to that given. - //extern Vec3 Min ( const Vec3 &, const Vec3 & ); - //extern Vec3 Max ( const Vec3 &, const Vec3 & ); - //extern double Angle ( const Vec3 &, const Vec3 & ); - //extern int Orthonormal ( Vec3 &, Vec3 & ); - //extern int Orthonormal ( Vec3 &, Vec3 &, Vec3 & ); - //extern float Trace ( const Mat3x3 & ); - //extern float Normalize ( Vec3 & ); - //extern float Norm1 ( const Mat3x3 & ); - //extern float SupNorm ( const Mat3x3 & ); - //extern double Determinant ( const Mat3x3 & ); - //extern Mat3x3 Transp ( const Mat3x3 & ); - //extern Mat3x3 Householder ( const Vec3 &, const Vec3 & ); - //extern Mat3x3 Householder ( const Vec3 & ); - //extern Mat3x3 Rotation3x3 ( float, float, float ); // Values in [0,1]. - //extern Mat3x3 Inverse ( const Mat3x3 & ); - //extern Mat3x3 Diag3x3 ( const Vec3 & ); - //extern Mat3x3 Diag3x3 ( float, float, float ); - //extern Mat3x3 Rotation3x3 ( const Vec3 &Axis, float angle ); - //extern Mat4x4 Rotation4x4 ( const Vec3 &Axis, const Vec3 &Origin, float angle ); - - - //========================================== - //=== Norm-related functions === - //========================================== - - inline double LenSqr ( const Vec3 &A ) { return Sqr(A[0]) + Sqr(A[1]) + Sqr(A[2]); } - inline double Len ( const Vec3 &A ) { return Sqrt( LenSqr( A ) ); } - inline double Norm1 ( const Vec3 &A ) { return Abs(A[0]) + Abs(A[1]) + Abs(A[2]); } - inline double Norm2 ( const Vec3 &A ) { return Len( A ); } - inline float SupNorm( const Vec3 &A ) { return MaxAbs( A[0], A[1], A[2] ); } - - - //========================================== - //=== Addition === - //========================================== - - inline Vec3 operator+( const Vec3 &A, const Vec3 &B ) - { - return Vec3( A.X() + B.X(), A.Y() + B.Y(), A.Z() + B.Z() ); - } - - inline Vec3& operator+=( Vec3 &A, const Vec3 &B ) - { - A.X() += B.X(); - A.Y() += B.Y(); - A.Z() += B.Z(); - return A; - } - - - //========================================== - //=== Subtraction === - //========================================== - - inline Vec3 operator-( const Vec3 &A, const Vec3 &B ) - { - return Vec3( A.X() - B.X(), A.Y() - B.Y(), A.Z() - B.Z() ); - } - - inline Vec3 operator-( const Vec3 &A ) - { - return Vec3( -A.X(), -A.Y(), -A.Z() ); - } - - inline Vec3& operator-=( Vec3 &A, const Vec3 &B ) - { - A.X() -= B.X(); - A.Y() -= B.Y(); - A.Z() -= B.Z(); - return A; - } - - - //========================================== - //=== Multiplication === - //========================================== - - inline Vec3 operator*( float a, const Vec3 &x ) - { - return Vec3( a * x.X(), a * x.Y(), a * x.Z() ); - } - - inline Vec3 operator*( const Vec3 &x, float a ) - { - return Vec3( a * x.X(), a * x.Y(), a * x.Z() ); - } - - inline float operator*( const Vec3 &A, const Vec3 &B ) // Inner product. - { - return A.X() * B.X() + A.Y() * B.Y() + A.Z() * B.Z(); - } - - inline Vec3& operator*=( Vec3 &A, float a ) - { - A.X() *= a; - A.Y() *= a; - A.Z() *= a; - return A; - } - - //inline Vec3& operator*=( Vec3 &A, const Mat3x3 &M ) // A = M * A - //{ - // float x = M(0,0) * A.X() + M(0,1) * A.Y() + M(0,2) * A.Z(); - // float y = M(1,0) * A.X() + M(1,1) * A.Y() + M(1,2) * A.Z(); - // float z = M(2,0) * A.X() + M(2,1) * A.Y() + M(2,2) * A.Z(); - // A.X() = x; - // A.Y() = y; - // A.Z() = z; - // return A; - //} - - //inline Vec3& operator*=( Vec3 &A, const Mat4x4 &M ) // A = M * A - //{ - // float x = M(0,0) * A.X() + M(0,1) * A.Y() + M(0,2) * A.Z() + M(0,3); - // float y = M(1,0) * A.X() + M(1,1) * A.Y() + M(1,2) * A.Z() + M(1,3); - // float z = M(2,0) * A.X() + M(2,1) * A.Y() + M(2,2) * A.Z() + M(2,3); - // A.X() = x; - // A.Y() = y; - // A.Z() = z; - // return A; - //} - - - //========================================== - //=== Division === - //========================================== - - inline Vec3 operator/( const Vec3 &A, double c ) - { - double t = 1.0 / c; - return Vec3( A.X() * t, A.Y() * t, A.Z() * t ); - } - - inline Vec3& operator/=( Vec3 &A, double a ) - { - A.X() /= a; - A.Y() /= a; - A.Z() /= a; - return A; - } - - inline Vec3 operator/( const Vec3 &A, const Vec3 &B ) // Remove component parallel to B. - { - Vec3 C; // Cumbersome due to compiler falure. - double x = LenSqr( B ); - if( x > 0.0 ) C = A - B * (( A * B ) / x); else C = A; - return C; - } - - inline void operator/=( Vec3 &A, const Vec3 &B ) // Remove component parallel to B. - { - double x = LenSqr( B ); - if( x > 0.0 ) A -= B * (( A * B ) / x); - } - - - //========================================== - //=== Miscellaneous === - //========================================== - - inline float operator|( const Vec3 &A, const Vec3 &B ) // Inner product. - { - return A * B; - } - - inline Vec3 Unit( const Vec3 &A ) - { - double d = LenSqr( A ); - return d > 0.0 ? A / sqrt(d) : Vec3(0,0,0); - } - - inline Vec3 Unit( float x, float y, float z ) - { - return Unit( Vec3( x, y, z ) ); - } - - inline Vec3 Ortho( const Vec3 &A, const Vec3 &B ) - { - return Unit( A / B ); - } - - inline int operator==( const Vec3 &A, float x ) - { - return (A[0] == x) && (A[1] == x) && (A[2] == x); - } - - inline Vec3 operator^( const Vec3 &A, const Vec3 &B ) - { - return Vec3( - A.Y() * B.Z() - A.Z() * B.Y(), - A.Z() * B.X() - A.X() * B.Z(), - A.X() * B.Y() - A.Y() * B.X() ); - } - - inline double dist( const Vec3 &A, const Vec3 &B ) - { - return Len( A - B ); - } - - inline double Dihedral( const Vec3 &A, const Vec3 &B, const Vec3 &C ) - { - return ArcCos( Unit( A ^ B ) * Unit( C ^ B ) ); - } - - inline Vec3 operator>>( const Vec3 &A, const Vec3 &B ) // Project A onto B. - { - Vec3 C; - double x = LenSqr( B ); - if( x > 0.0 ) C = B * (( A * B ) / x); - return C; - } - - inline Vec3 operator<<( const Vec3 &A, const Vec3 &B ) // Project B onto A. - { - return B >> A; - } - - inline double Triple( const Vec3 &A, const Vec3 &B, const Vec3 &C ) - { - return ( A ^ B ) * C; - } - - - //========================================== - //=== Operations involving Matrices === - //========================================== - - //inline Mat3x3 Outer( const Vec3 &A, const Vec3 &B ) // Outer product. - //{ - // Mat3x3 C; - // C(0,0) = A.X() * B.X(); - // C(0,1) = A.X() * B.Y(); - // C(0,2) = A.X() * B.Z(); - // C(1,0) = A.Y() * B.X(); - // C(1,1) = A.Y() * B.Y(); - // C(1,2) = A.Y() * B.Z(); - // C(2,0) = A.Z() * B.X(); - // C(2,1) = A.Z() * B.Y(); - // C(2,2) = A.Z() * B.Z(); - // return C; - //} - - //inline Vec3 operator*( const Mat3x3 &M, const Vec3 &A ) - //{ - // return Vec3( - // M(0,0) * A[0] + M(0,1) * A[1] + M(0,2) * A[2], - // M(1,0) * A[0] + M(1,1) * A[1] + M(1,2) * A[2], - // M(2,0) * A[0] + M(2,1) * A[1] + M(2,2) * A[2]); - //} - - //inline Vec3 operator*( const Vec3 &A, const Mat3x3 &M ) - //{ - // return Vec3( - // A[0] * M(0,0) + A[1] * M(1,0) + A[2] * M(2,0), - // A[0] * M(0,1) + A[1] * M(1,1) + A[2] * M(2,1), - // A[0] * M(0,2) + A[1] * M(1,2) + A[2] * M(2,2)); - //} - - ////========================================== - ////=== Operations on Matrices === - ////========================================== - - //inline Mat3x3 operator+( const Mat3x3 &A, const Mat3x3 &B ) - //{ - // Mat3x3 C; - // C(0,0) = A(0,0) + B(0,0); C(0,1) = A(0,1) + B(0,1); C(0,2) = A(0,2) + B(0,2); - // C(1,0) = A(1,0) + B(1,0); C(1,1) = A(1,1) + B(1,1); C(1,2) = A(1,2) + B(1,2); - // C(2,0) = A(2,0) + B(2,0); C(2,1) = A(2,1) + B(2,1); C(2,2) = A(2,2) + B(2,2); - // return C; - //} - - //inline Mat3x3 operator-( const Mat3x3 &A, const Mat3x3 &B ) - //{ - // Mat3x3 C; - // C(0,0) = A(0,0) - B(0,0); C(0,1) = A(0,1) - B(0,1); C(0,2) = A(0,2) - B(0,2); - // C(1,0) = A(1,0) - B(1,0); C(1,1) = A(1,1) - B(1,1); C(1,2) = A(1,2) - B(1,2); - // C(2,0) = A(2,0) - B(2,0); C(2,1) = A(2,1) - B(2,1); C(2,2) = A(2,2) - B(2,2); - // return C; - //} - - //inline Mat3x3 operator*( const Mat3x3 &A, const Mat3x3 &B ) - //{ - // Mat3x3 C; - // C(0,0) = A(0,0) * B(0,0) + A(0,1) * B(1,0) + A(0,2) * B(2,0); - // C(0,1) = A(0,0) * B(0,1) + A(0,1) * B(1,1) + A(0,2) * B(2,1); - // C(0,2) = A(0,0) * B(0,2) + A(0,1) * B(1,2) + A(0,2) * B(2,2); - // C(1,0) = A(1,0) * B(0,0) + A(1,1) * B(1,0) + A(1,2) * B(2,0); - // C(1,1) = A(1,0) * B(0,1) + A(1,1) * B(1,1) + A(1,2) * B(2,1); - // C(1,2) = A(1,0) * B(0,2) + A(1,1) * B(1,2) + A(1,2) * B(2,2); - // C(2,0) = A(2,0) * B(0,0) + A(2,1) * B(1,0) + A(2,2) * B(2,0); - // C(2,1) = A(2,0) * B(0,1) + A(2,1) * B(1,1) + A(2,2) * B(2,1); - // C(2,2) = A(2,0) * B(0,2) + A(2,1) * B(1,2) + A(2,2) * B(2,2); - // return C; - //} - - //inline void Mat3x3::ScaleRows( float a, float b, float c ) - //{ - // m[0][0] *= a; m[0][1] *= a; m[0][2] *= a; - // m[1][0] *= b; m[1][1] *= b; m[1][2] *= b; - // m[2][0] *= c; m[2][1] *= c; m[2][2] *= c; - //} - - //inline void Mat3x3::ScaleCols( float a, float b, float c ) - //{ - // m[0][0] *= a; m[0][1] *= b; m[0][2] *= c; - // m[1][0] *= a; m[1][1] *= b; m[1][2] *= c; - // m[2][0] *= a; m[2][1] *= b; m[2][2] *= c; - //} - - - //========================================== - //=== Special Matrices === - //========================================== - - //inline Mat3x3::Mat3x3() - //{ - // m[0][0] = 0; m[0][1] = 0; m[0][2] = 0; - // m[1][0] = 0; m[1][1] = 0; m[1][2] = 0; - // m[2][0] = 0; m[2][1] = 0; m[2][2] = 0; - //} - - //inline Mat3x3 Ident3x3() - //{ - // Mat3x3 I; - // I(0,0) = 1.0; - // I(1,1) = 1.0; - // I(2,2) = 1.0; - // return I; - //} - - //inline Mat4x4 Ident4x4() - //{ - // Mat4x4 I; - // I(0,0) = 1.0; - // I(1,1) = 1.0; - // I(2,2) = 1.0; - // I(3,3) = 1.0; - // return I; - //} - - //inline void Adjoint( const Mat3x3 &M, Mat3x3 &A ) - //{ - // A(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1); - // A(0,1) = M(1,2) * M(2,0) - M(1,0) * M(2,2); - // A(0,2) = M(1,0) * M(2,1) - M(1,1) * M(2,0); - - // A(1,0) = M(0,2) * M(2,1) - M(0,1) * M(2,2); - // A(1,1) = M(0,0) * M(2,2) - M(0,2) * M(2,0); - // A(1,2) = M(0,1) * M(2,0) - M(0,0) * M(2,1); - - // A(2,0) = M(0,1) * M(1,2) - M(0,2) * M(1,1); - // A(2,1) = M(0,2) * M(1,0) - M(0,0) * M(1,2); - // A(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0); - //} - - //inline void TranspAdjoint( const Mat3x3 &M, Mat3x3 &A ) - //{ - // A(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1); - // A(1,0) = M(1,2) * M(2,0) - M(1,0) * M(2,2); - // A(2,0) = M(1,0) * M(2,1) - M(1,1) * M(2,0); - - // A(0,1) = M(0,2) * M(2,1) - M(0,1) * M(2,2); - // A(1,1) = M(0,0) * M(2,2) - M(0,2) * M(2,0); - // A(2,1) = M(0,1) * M(2,0) - M(0,0) * M(2,1); - - // A(0,2) = M(0,1) * M(1,2) - M(0,2) * M(1,1); - // A(1,2) = M(0,2) * M(1,0) - M(0,0) * M(1,2); - // A(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0); - //} - - //inline void Adjoint( const Mat3x3 &M, Mat3x3 &A, double &det ) - //{ - // Adjoint( M, A ); - // det = A(0,0) * M(0,0) + A(1,0) * M(1,0) + A(2,0) * M(2,0); - //} - - //inline void TranspAdjoint( const Mat3x3 &M, Mat3x3 &A, double &det ) - //{ - // TranspAdjoint( M, A ); - // det = A(0,0) * M(0,0) + A(0,1) * M(1,0) + A(0,2) * M(2,0); - //} - - - //========================================== - //=== Output routines === - //========================================== - - extern std::ostream &operator<<( std::ostream &out, const Vec3 & ); - //extern std::ostream &operator<<( std::ostream &out, const Mat3x3 & ); - //extern std::ostream &operator<<( std::ostream &out, const Mat4x4 & ); -}; -#endif diff --git a/src/nvtt/bc7/arvo/Vec4.cpp b/src/nvtt/bc7/arvo/Vec4.cpp deleted file mode 100644 index 286a203..0000000 --- a/src/nvtt/bc7/arvo/Vec4.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/*************************************************************************** -* Vec4.C * -* * -* Basic operations on 3-dimensional vectors. This special case is useful * -* because many operations are performed inline. * -* * -* HISTORY * -* Name Date Description * -* * -* walt 6/26/07 Edited Vec4 to make this new class * -* arvo 10/27/94 Reorganized (removed Col & Row distinction). * -* arvo 06/14/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1994, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include -#include "ArvoMath.h" -#include "Vec4.h" -#include "form.h" - -namespace ArvoMath { - - float Normalize( Vec4 &A ) - { - float d = Len( A ); - if( d > 0.0 ) - { - double c = 1.0 / d; - A.X() *= c; - A.Y() *= c; - A.Z() *= c; - A.W() *= c; - } - return( d ); - } - - double Angle( const Vec4 &A, const Vec4 &B ) - { - double t = LenSqr(A) * LenSqr(B); - if( t <= 0.0 ) return 0.0; - return ArcCos( (A * B) / sqrt(t) ); - } - - Vec4 Min( const Vec4 &A, const Vec4 &B ) - { - return Vec4( - Min( A.X(), B.X() ), - Min( A.Y(), B.Y() ), - Min( A.Z(), B.Z() ), - Min( A.W(), B.W() ) ); - } - - Vec4 Max( const Vec4 &A, const Vec4 &B ) - { - return Vec4( - Max( A.X(), B.X() ), - Max( A.Y(), B.Y() ), - Max( A.Z(), B.Z() ), - Max( A.W(), B.W() ) ); - } - - std::ostream &operator<<( std::ostream &out, const Vec4 &A ) - { - out << form( " %9.5f %9.5f %9.5f %9.5f", A.X(), A.Y(), A.Z(), A.W() ) << std::endl; - return out; - } -}; diff --git a/src/nvtt/bc7/arvo/Vec4.h b/src/nvtt/bc7/arvo/Vec4.h deleted file mode 100644 index efe1f3f..0000000 --- a/src/nvtt/bc7/arvo/Vec4.h +++ /dev/null @@ -1,238 +0,0 @@ -/*************************************************************************** -* Vec4.h * -* * -* Basic operations on 4-dimensional vectors. This special case is useful * -* because many operations are performed inline. * -* * -* HISTORY * -* Name Date Description * -* * -* walt 6/26/07 Edited Vec3 to make this new class * -* arvo 10/27/94 Reorganized (removed Col & Row distinction). * -* arvo 06/14/93 Initial coding. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 1994, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __Vec4_INCLUDED__ -#define __Vec4_INCLUDED__ - -#include -#include -#include "Vec2.h" -#include "Vec3.h" - -namespace ArvoMath { - - class Vec4 { - public: - Vec4( float c = 0.0 ) { x = c; y = c; z = c; w = c; } - Vec4( float a, float b, float c, float d ) { x = a; y = b; z = c; w = d; } - Vec4( const Vec4 &A ) { x = A.X(); y = A.Y(); z = A.Z(); w = A.W(); } - Vec4( const Vec3 &A, float d ) { x = A.X(); y = A.Y(); z = A.Z(); w = d; } - void operator=( float c ) { x = c; y = c; z = c; w = c; } - void operator=( const Vec4 &A ) { x = A.X(); y = A.Y(); z = A.Z(); w = A.W(); } - void operator=( const Vec3 &A ) { x = A.X(); y = A.Y(); z = A.Z(); w = 0.0; } - void operator=( const Vec2 &A ) { x = A.X(); y = A.Y(); z = 0.0; w = 0.0; } - ~Vec4() {} - float X() const { return x; } - float Y() const { return y; } - float Z() const { return z; } - float W() const { return w; } - float & X() { return x; } - float & Y() { return y; } - float & Z() { return z; } - float & W() { return w; } - float operator[]( int i ) const { return *( &x + i ); } - float & operator[]( int i ) { return *( &x + i ); } - private: - float x, y, z, w; - }; - - //========================================== - //=== Norm-related functions === - //========================================== - - inline double LenSqr ( const Vec4 &A ) { return Sqr(A[0]) + Sqr(A[1]) + Sqr(A[2]) + Sqr(A[3]); } - inline double Len ( const Vec4 &A ) { return Sqrt( LenSqr( A ) ); } - inline double Norm1 ( const Vec4 &A ) { return Abs(A[0]) + Abs(A[1]) + Abs(A[2]) + Abs(A[3]); } - inline double Norm2 ( const Vec4 &A ) { return Len( A ); } - inline float SupNorm( const Vec4 &A ) { return MaxAbs( A[0], A[1], A[2], A[3] ); } - - - //========================================== - //=== Addition === - //========================================== - - inline Vec4 operator+( const Vec4 &A, const Vec4 &B ) - { - return Vec4( A.X() + B.X(), A.Y() + B.Y(), A.Z() + B.Z(), A.W() + B.W() ); - } - - inline Vec4& operator+=( Vec4 &A, const Vec4 &B ) - { - A.X() += B.X(); - A.Y() += B.Y(); - A.Z() += B.Z(); - A.W() += B.W(); - return A; - } - - - //========================================== - //=== Subtraction === - //========================================== - - inline Vec4 operator-( const Vec4 &A, const Vec4 &B ) - { - return Vec4( A.X() - B.X(), A.Y() - B.Y(), A.Z() - B.Z(), A.W() - B.W()); - } - - inline Vec4 operator-( const Vec4 &A ) - { - return Vec4( -A.X(), -A.Y(), -A.Z(), -A.W() ); - } - - inline Vec4& operator-=( Vec4 &A, const Vec4 &B ) - { - A.X() -= B.X(); - A.Y() -= B.Y(); - A.Z() -= B.Z(); - A.W() -= B.W(); - return A; - } - - - //========================================== - //=== Multiplication === - //========================================== - - inline Vec4 operator*( float a, const Vec4 &x ) - { - return Vec4( a * x.X(), a * x.Y(), a * x.Z(), a * x.W() ); - } - - inline Vec4 operator*( const Vec4 &x, float a ) - { - return Vec4( a * x.X(), a * x.Y(), a * x.Z(), a * x.W() ); - } - - inline float operator*( const Vec4 &A, const Vec4 &B ) // Inner product. - { - return A.X() * B.X() + A.Y() * B.Y() + A.Z() * B.Z() + A.W() * B.W(); - } - - inline Vec4& operator*=( Vec4 &A, float a ) - { - A.X() *= a; - A.Y() *= a; - A.Z() *= a; - A.W() *= a; - return A; - } - - //========================================== - //=== Division === - //========================================== - - inline Vec4 operator/( const Vec4 &A, double c ) - { - double t = 1.0 / c; - return Vec4( A.X() * t, A.Y() * t, A.Z() * t, A.W() * t); - } - - inline Vec4& operator/=( Vec4 &A, double a ) - { - A.X() /= a; - A.Y() /= a; - A.Z() /= a; - A.W() /= a; - return A; - } - - inline Vec4 operator/( const Vec4 &A, const Vec4 &B ) // Remove component parallel to B. - { - Vec4 C; // Cumbersome due to compiler falure. - double x = LenSqr( B ); - if( x > 0.0 ) C = A - B * (( A * B ) / x); else C = A; - return C; - } - - inline void operator/=( Vec4 &A, const Vec4 &B ) // Remove component parallel to B. - { - double x = LenSqr( B ); - if( x > 0.0 ) A -= B * (( A * B ) / x); - } - - - //========================================== - //=== Miscellaneous === - //========================================== - - inline float operator|( const Vec4 &A, const Vec4 &B ) // Inner product. - { - return A * B; - } - - inline Vec4 Unit( const Vec4 &A ) - { - double d = LenSqr( A ); - return d > 0.0 ? A / sqrt(d) : Vec4(0,0,0,0); - } - - inline Vec4 Unit( float x, float y, float z, float w ) - { - return Unit( Vec4( x, y, z, w ) ); - } - - inline Vec4 Ortho( const Vec4 &A, const Vec4 &B ) - { - return Unit( A / B ); - } - - inline int operator==( const Vec4 &A, float x ) - { - return (A[0] == x) && (A[1] == x) && (A[2] == x) && (A[3] == x); - } - -// inline Vec4 operator^( const Vec4 &A, const Vec4 &B ) there is no 4ED "cross product" of 2 4D vectors -- we need six dimensions - - inline double dist( const Vec4 &A, const Vec4 &B ) - { - return Len( A - B ); - } - -// inline double Dihedral( const Vec4 &A, const Vec4 &B, const Vec4 &C ) - - inline Vec4 operator>>( const Vec4 &A, const Vec4 &B ) // Project A onto B. - { - Vec4 C; - double x = LenSqr( B ); - if( x > 0.0 ) C = B * (( A * B ) / x); - return C; - } - - inline Vec4 operator<<( const Vec4 &A, const Vec4 &B ) // Project B onto A. - { - return B >> A; - } - -// inline double Triple( const Vec4 &A, const Vec4 &B, const Vec4 &C ) - - //========================================== - //=== Output routines === - //========================================== - - extern std::ostream &operator<<( std::ostream &out, const Vec4 & ); -}; -#endif diff --git a/src/nvtt/bc7/arvo/Vector.cpp b/src/nvtt/bc7/arvo/Vector.cpp deleted file mode 100644 index af3bc11..0000000 --- a/src/nvtt/bc7/arvo/Vector.cpp +++ /dev/null @@ -1,366 +0,0 @@ -/*************************************************************************** -* Vector.C * -* * -* General Vector and Matrix classes, with all the associated methods. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 08/16/2000 Revamped for CIT tools. * -* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * -* arvo 06/30/1993 Added singular value decomposition class. * -* arvo 06/25/1993 Major revisions. * -* arvo 09/08/1991 Initial implementation. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 2000, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#include -#include -#include "ArvoMath.h" -#include "Vector.h" -#include "form.h" - -namespace ArvoMath { - - const Vector Vector::Null(0); - - /*-------------------------------------------------------------------------* - * * - * C O N S T R U C T O R S * - * * - *-------------------------------------------------------------------------*/ - Vector::Vector( const float *x, int n ) - { - Create( n ); - for( register int i = 0; i < size; i++ ) elem[i] = x[i]; - } - - Vector::Vector( const Vector &A ) - { - Create( A.Size() ); - for( register int i = 0; i < A.Size(); i++ ) elem[i] = A(i); - } - - Vector::Vector( int n ) - { - Create( n ); - for( register int i = 0; i < n; i++ ) elem[i] = 0.0; - } - - Vector::Vector( float x, float y ) - { - Create( 2 ); - elem[0] = x; - elem[1] = y; - } - - Vector::Vector( float x, float y, float z ) - { - Create( 3 ); - elem[0] = x; - elem[1] = y; - elem[2] = z; - } - - void Vector::SetSize( int new_size ) - { - if( size != new_size ) - { - delete[] elem; - Create( new_size ); - for( register int i = 0; i < new_size; i++ ) elem[i] = 0.0; - } - } - - Vector &Vector::Swap( int i, int j ) - { - float temp = elem[i]; - elem[i] = elem[j]; - elem[j] = temp; - return *this; - } - - Vector Vector::GetBlock( int i, int j ) const - { - assert( 0 <= i && i <= j && j < size ); - int n = j - i + 1; - Vector V( n ); - register float *v = V.Array(); - register float *e = elem + i; - for( register int k = 0; k < n; k++ ) *v++ = *e++; - return V; - } - - void Vector::SetBlock( int i, int j, const Vector &V ) - { - assert( 0 <= i && i <= j && j < size ); - int n = j - i + 1; - assert( n == V.Size() ); - register float *v = V.Array(); - register float *e = elem + i; - for( register int k = 0; k < n; k++ ) *e++ = *v++; - } - - /*-------------------------------------------------------------------------* - * * - * O P E R A T O R S * - * * - *-------------------------------------------------------------------------*/ - double operator*( const Vector &A, const Vector &B ) - { - assert( A.Size() == B.Size() ); - double sum = A(0) * B(0); - for( register int i = 1; i < A.Size(); i++ ) sum += A(i) * B(i); - return sum; - } - - void Vector::operator=( float c ) - { - for( register int i = 0; i < size; i++ ) elem[i] = c; - } - - Vector operator*( const Vector &A, float s ) - { - Vector C( A.Size() ); - for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) * s; - return C; - } - - Vector operator*( float s, const Vector &A ) - { - Vector C( A.Size() ); - for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) * s; - return C; - } - - Vector operator/( const Vector &A, float s ) - { - assert( s != 0.0 ); - Vector C( A.Size() ); - for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) / s; - return C; - } - - Vector& operator+=( Vector &A, const Vector &B ) - { - assert( A.Size() == B.Size() ); - for( register int i = 0; i < A.Size(); i++ ) A(i) += B(i); - return A; - } - - Vector& operator*=( Vector &A, float scale ) - { - for( register int i = 0; i < A.Size(); i++ ) A(i) *= scale; - return A; - } - - Vector& operator/=( Vector &A, float scale ) - { - for( register int i = 0; i < A.Size(); i++ ) A(i) /= scale; - return A; - } - - Vector& Vector::operator=( const Vector &A ) - { - SetSize( A.Size() ); - for( register int i = 0; i < size; i++ ) elem[i] = A(i); - return *this; - } - - Vector operator+( const Vector &A, const Vector &B ) - { - assert( A.Size() == B.Size() ); - Vector C( A.Size() ); - for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) + B(i); - return C; - } - - Vector operator-( const Vector &A, const Vector &B ) - { - assert( A.Size() == B.Size() ); - Vector C( A.Size() ); - for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) - B(i); - return C; - } - - Vector operator-( const Vector &A ) // Unary minus. - { - Vector B( A.Size() ); - for( register int i = 0; i < A.Size(); i++ ) B(i) = -A(i); - return B; - } - - Vector operator^( const Vector &A, const Vector &B ) - { - Vector C(3); - assert( A.Size() == B.Size() ); - if( A.Size() == 2 ) // Assume z components of A and B are zero. - { - C(0) = 0.0; - C(1) = 0.0; - C(2) = A(0) * B(1) - A(1) * B(0); - } - else - { - assert( A.Size() == 3 ); - C(0) = A(1) * B(2) - A(2) * B(1); - C(1) = A(2) * B(0) - A(0) * B(2); - C(2) = A(0) * B(1) - A(1) * B(0); - } - return C; - } - - /*-------------------------------------------------------------------------* - * * - * M I S C E L L A N E O U S F U N C T I O N S * - * * - *-------------------------------------------------------------------------*/ - Vector Min( const Vector &A, const Vector &B ) - { - assert( A.Size() == B.Size() ); - Vector C( A.Size() ); - for( register int i = 0; i < A.Size(); i++ ) C(i) = Min( A(i), B(i) ); - return C; - } - - Vector Max( const Vector &A, const Vector &B ) - { - assert( A.Size() == B.Size() ); - Vector C( A.Size() ); - for( register int i = 0; i < A.Size(); i++ ) C(i) = Max( A(i), B(i) ); - return C; - } - - Vector Unit( const Vector &A ) - { - double norm = TwoNorm( A ); - assert( norm > 0.0 ); - return A * ( 1.0 / norm ); - } - - double Normalize( Vector &A ) - { - double norm = TwoNorm( A ); - assert( norm > 0.0 ); - for( register int i = 0; i < A.Size(); i++ ) A(i) /= norm; - return norm; - } - - int Null( const Vector &A ) - { - return A.Size() == 0; - } - - double TwoNormSqr( const Vector &A ) - { - double sum = A(0) * A(0); - for( register int i = 1; i < A.Size(); i++ ) sum += A(i) * A(i); - return sum; - } - - double TwoNorm( const Vector &A ) - { - return sqrt( TwoNormSqr( A ) ); - } - - double dist( const Vector &A, const Vector &B ) - { - return TwoNorm( A - B ); - } - - double OneNorm( const Vector &A ) - { - double norm = Abs( A(0) ); - for( register int i = 1; i < A.Size(); i++ ) norm += Abs( A(i) ); - return norm; - } - - double SupNorm( const Vector &A ) - { - double norm = Abs( A(0) ); - for( register int i = 1; i < A.Size(); i++ ) - { - double a = Abs( A(i) ); - if( a > norm ) norm = a; - } - return norm; - } - - Vec2 ToVec2( const Vector &V ) - { - assert( V.Size() == 2 ); - return Vec2( V(0), V(1) ); - } - - Vec3 ToVec3( const Vector &V ) - { - assert( V.Size() == 3 ); - return Vec3( V(0), V(1), V(2) ); - } - - Vector ToVector( const Vec2 &V ) - { - return Vector( V.X(), V.Y() ); - } - - Vector ToVector( const Vec3 &V ) - { - return Vector( V.X(), V.Y(), V.Z() ); - } - - // - // Returns a vector that is orthogonal to A (but of arbitrary length). - // - Vector OrthogonalTo( const Vector &A ) - { - Vector B( A.Size() ); - double c = 0.5 * SupNorm( A ); - - if( A.Size() < 2 ) - { - // Just return the zero-vector. - } - else if( c == 0.0 ) - { - B(0) = 1.0; - } - else for( register int i = 0; i < A.Size(); i++ ) - { - if( Abs( A(i)) > c ) - { - int k = ( i > 0 ) ? i - 1 : i + 1; - B(k) = -A(i); - B(i) = A(k); - break; - } - } - return B; - } - - std::ostream &operator<<( std::ostream &out, const Vector &A ) - { - if( A.Size() == 0 ) - { - out << "NULL"; - } - else for( register int i = 0; i < A.Size(); i++ ) - { - out << form( "%3d: %10.5g\n", i, A(i) ); - } - out << std::endl; - return out; - } - - -}; diff --git a/src/nvtt/bc7/arvo/Vector.h b/src/nvtt/bc7/arvo/Vector.h deleted file mode 100644 index 01e66df..0000000 --- a/src/nvtt/bc7/arvo/Vector.h +++ /dev/null @@ -1,103 +0,0 @@ -/*************************************************************************** -* Vector.h * -* * -* General Vector and Matrix classes, with all the associated methods. * -* * -* HISTORY * -* Name Date Description * -* * -* arvo 08/16/2000 Revamped for CIT tools. * -* arvo 10/31/1994 Combined RowVec & ColVec into Vector. * -* arvo 06/30/1993 Added singular value decomposition class. * -* arvo 06/25/1993 Major revisions. * -* arvo 09/08/1991 Initial implementation. * -* * -*--------------------------------------------------------------------------* -* Copyright (C) 2000, James Arvo * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU General Public License as published by the * -* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html * -* * -* This program is distributed in the hope that it will be useful, but * -* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for * -* any particular purpose. See the GNU General Public License for more * -* details. * -* * -***************************************************************************/ -#ifndef __VECTOR_INCLUDED__ -#define __VECTOR_INCLUDED__ - -#include -#include "Vec2.h" -#include "Vec3.h" - -namespace ArvoMath { - class Vector { - public: - Vector( int size = 0 ); - Vector( const Vector & ); - Vector( float, float ); - Vector( float, float, float ); - Vector( const float *x, int n ); - Vector &operator=( const Vector & ); - void operator=( float ); - void SetSize( int ); - Vector &Swap( int i, int j ); - Vector GetBlock( int i, int j ) const; - void SetBlock( int i, int j, const Vector & ); - static const Vector Null; - - public: // Inlined functions. - inline float operator()( int i ) const { return elem[i]; } - inline float& operator()( int i ) { return elem[i]; } - inline float* Array() const { return elem; } - inline int Size () const { return size; } - inline ~Vector() { delete[] elem; } - - private: - void Create( int n = 0 ) { size = n; elem = new float[n]; } - int size; - float* elem; - }; - - extern Vector operator + ( const Vector &, const Vector & ); - extern Vector operator - ( const Vector &, const Vector & ); // Binary minus. - extern Vector operator - ( const Vector & ); // Unary minus. - extern Vector operator * ( const Vector &, float ); - extern Vector operator * ( float , const Vector & ); - extern Vector operator / ( const Vector &, float ); - extern Vector operator / ( const Vector &, const Vector & ); - extern Vector operator ^ ( const Vector &, const Vector & ); - extern Vector& operator += ( Vector &, const Vector & ); - extern Vector& operator *= ( Vector &, float ); - extern Vector& operator /= ( Vector &, float ); - extern Vector Min ( const Vector &, const Vector & ); - extern Vector Max ( const Vector &, const Vector & ); - extern double operator * ( const Vector &, const Vector & ); // Inner product. - extern double dist ( const Vector &, const Vector & ); - extern Vector OrthogonalTo( const Vector & ); // Returns some orthogonal vector. - extern Vector Unit ( const Vector & ); - extern double Normalize ( Vector & ); - extern double OneNorm ( const Vector & ); - extern double TwoNorm ( const Vector & ); - extern double TwoNormSqr ( const Vector & ); - extern double SupNorm ( const Vector & ); - extern int Null ( const Vector & ); - extern Vec2 ToVec2 ( const Vector & ); - extern Vec3 ToVec3 ( const Vector & ); - extern Vector ToVector ( const Vec2 & ); - extern Vector ToVector ( const Vec3 & ); - - std::ostream &operator<<( - std::ostream &out, - const Vector & - ); -}; -#endif - - - - - - diff --git a/src/nvtt/bc7/arvo/form.h b/src/nvtt/bc7/arvo/form.h deleted file mode 100644 index 48aef94..0000000 --- a/src/nvtt/bc7/arvo/form.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef __FORM_INCLUDED__ -#define __FORM_INCLUDED__ - -#include -#include -#include -#include - -namespace ArvoMath { - - inline const char *form(char *fmt, ...) - { - static char printbfr[65536]; - va_list arglist; - - va_start(arglist,fmt); - int length = vsprintf(printbfr,fmt,arglist); - va_end(arglist); - - assert(length > 65536); - - return printbfr; - } -}; - -#endif diff --git a/src/nvtt/bc7/avpcl.cpp b/src/nvtt/bc7/avpcl.cpp index 6cbb972..af017b4 100644 --- a/src/nvtt/bc7/avpcl.cpp +++ b/src/nvtt/bc7/avpcl.cpp @@ -12,41 +12,40 @@ See the License for the specific language governing permissions and limitations // the avpcl compressor and decompressor -#include -#include -#include -#include -#include - -#include "ImfArray.h" -#include "RGBA.h" #include "tile.h" #include "avpcl.h" -#include "targa.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include -#ifndef MIN -#define MIN(x,y) ((x)<(y)?(x):(y)) -#endif +using namespace nv; +using namespace AVPCL; -using namespace std; +// global flags +bool AVPCL::flag_premult = false; +bool AVPCL::flag_nonuniform = false; +bool AVPCL::flag_nonuniform_ati = false; -void AVPCL::compress(const Tile &t, char *block, FILE *errfile) +// global mode +bool AVPCL::mode_rgb = false; // true if image had constant alpha = 255 + +void AVPCL::compress(const Tile &t, char *block) { char tempblock[AVPCL::BLOCKSIZE]; - double msebest = DBL_MAX; + float msebest = FLT_MAX; - double mse_mode0 = AVPCL::compress_mode0(t, tempblock); if(mse_mode0 < msebest) { msebest = mse_mode0; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } - double mse_mode1 = AVPCL::compress_mode1(t, tempblock); if(mse_mode1 < msebest) { msebest = mse_mode1; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } - double mse_mode2 = AVPCL::compress_mode2(t, tempblock); if(mse_mode2 < msebest) { msebest = mse_mode2; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } - double mse_mode3 = AVPCL::compress_mode3(t, tempblock); if(mse_mode3 < msebest) { msebest = mse_mode3; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } - double mse_mode4 = AVPCL::compress_mode4(t, tempblock); if(mse_mode4 < msebest) { msebest = mse_mode4; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } - double mse_mode5 = AVPCL::compress_mode5(t, tempblock); if(mse_mode5 < msebest) { msebest = mse_mode5; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } - double mse_mode6 = AVPCL::compress_mode6(t, tempblock); if(mse_mode6 < msebest) { msebest = mse_mode6; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } - double mse_mode7 = AVPCL::compress_mode7(t, tempblock); if(mse_mode7 < msebest) { msebest = mse_mode7; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode0 = AVPCL::compress_mode0(t, tempblock); if(mse_mode0 < msebest) { msebest = mse_mode0; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode1 = AVPCL::compress_mode1(t, tempblock); if(mse_mode1 < msebest) { msebest = mse_mode1; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode2 = AVPCL::compress_mode2(t, tempblock); if(mse_mode2 < msebest) { msebest = mse_mode2; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode3 = AVPCL::compress_mode3(t, tempblock); if(mse_mode3 < msebest) { msebest = mse_mode3; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode4 = AVPCL::compress_mode4(t, tempblock); if(mse_mode4 < msebest) { msebest = mse_mode4; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode5 = AVPCL::compress_mode5(t, tempblock); if(mse_mode5 < msebest) { msebest = mse_mode5; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode6 = AVPCL::compress_mode6(t, tempblock); if(mse_mode6 < msebest) { msebest = mse_mode6; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode7 = AVPCL::compress_mode7(t, tempblock); if(mse_mode7 < msebest) { msebest = mse_mode7; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } - if (errfile) + /*if (errfile) { - double errs[21]; + float errs[21]; int nerrs = 8; errs[0] = mse_mode0; errs[1] = mse_mode1; @@ -56,11 +55,12 @@ void AVPCL::compress(const Tile &t, char *block, FILE *errfile) errs[5] = mse_mode5; errs[6] = mse_mode6; errs[7] = mse_mode7; - if (fwrite(errs, sizeof(double), nerrs, errfile) != nerrs) + if (fwrite(errs, sizeof(float), nerrs, errfile) != nerrs) throw "Write error on error file"; - } + }*/ } +/* static int getbit(char *b, int start) { if (start < 0 || start >= 128) return 0; // out of range @@ -94,14 +94,12 @@ static void setbits(char *b, int start, int len, int bits) for (int i=0; i> i); } +*/ void AVPCL::decompress(const char *cblock, Tile &t) { - Vec4 zero(0); - - char block[16]; - - for (int i=0; i<16; ++i) block[i] = cblock[i]; + char block[AVPCL::BLOCKSIZE]; + memcpy(block, cblock, AVPCL::BLOCKSIZE); switch(getmode(block)) { @@ -116,12 +114,13 @@ void AVPCL::decompress(const char *cblock, Tile &t) case 8: // return a black tile if you get a reserved mode for (int y=0; y pixels; @@ -158,12 +157,12 @@ void AVPCL::compress(string inf, string avpclf, string errf) // convert to tiles and compress each tile for (int y=0; y -#include - #include "tile.h" #include "bits.h" -using namespace std; - -#define EXTERNAL_RELEASE 1 // define this if we're releasing this code externally #define DISABLE_EXHAUSTIVE 1 // define this if you don't want to spend a lot of time on exhaustive compression #define USE_ZOH_INTERP 1 // use zoh interpolator, otherwise use exact avpcl interpolators #define USE_ZOH_INTERP_ROUNDED 1 // use the rounded versions! -#define NREGIONS_TWO 2 -#define NREGIONS_THREE 3 -#define DBL_MAX (1.0e37) // doesn't have to be really dblmax, just bigger than any possible squared error +namespace AVPCL { -class AVPCL +static const int NREGIONS_TWO = 2; +static const int NREGIONS_THREE = 3; + +static const int BLOCKSIZE=16; +static const int BITSIZE=128; + +// global flags +extern bool flag_premult; +extern bool flag_nonuniform; +extern bool flag_nonuniform_ati; + +// global mode +extern bool mode_rgb; // true if image had constant alpha = 255 + +void compress(const Tile &t, char *block); +void decompress(const char *block, Tile &t); + +float compress_mode0(const Tile &t, char *block); +void decompress_mode0(const char *block, Tile &t); + +float compress_mode1(const Tile &t, char *block); +void decompress_mode1(const char *block, Tile &t); + +float compress_mode2(const Tile &t, char *block); +void decompress_mode2(const char *block, Tile &t); + +float compress_mode3(const Tile &t, char *block); +void decompress_mode3(const char *block, Tile &t); + +float compress_mode4(const Tile &t, char *block); +void decompress_mode4(const char *block, Tile &t); + +float compress_mode5(const Tile &t, char *block); +void decompress_mode5(const char *block, Tile &t); + +float compress_mode6(const Tile &t, char *block); +void decompress_mode6(const char *block, Tile &t); + +float compress_mode7(const Tile &t, char *block); +void decompress_mode7(const char *block, Tile &t); + +inline int getmode(Bits &in) { -public: - static const int BLOCKSIZE=16; - static const int BITSIZE=128; + int mode = 0; - // global flags - static bool flag_premult; - static bool flag_nonuniform; - static bool flag_nonuniform_ati; + if (in.read(1)) mode = 0; + else if (in.read(1)) mode = 1; + else if (in.read(1)) mode = 2; + else if (in.read(1)) mode = 3; + else if (in.read(1)) mode = 4; + else if (in.read(1)) mode = 5; + else if (in.read(1)) mode = 6; + else if (in.read(1)) mode = 7; + else mode = 8; // reserved + return mode; +} +inline int getmode(const char *block) +{ + int bits = block[0], mode = 0; - // global mode - static bool mode_rgb; // true if image had constant alpha = 255 + if (bits & 1) mode = 0; + else if ((bits&3) == 2) mode = 1; + else if ((bits&7) == 4) mode = 2; + else if ((bits & 0xF) == 8) mode = 3; + else if ((bits & 0x1F) == 16) mode = 4; + else if ((bits & 0x3F) == 32) mode = 5; + else if ((bits & 0x7F) == 64) mode = 6; + else if ((bits & 0xFF) == 128) mode = 7; + else mode = 8; // reserved + return mode; +} - static void compress(string inf, string zohf, string errf); - static void decompress(string zohf, string outf); - static void compress(const Tile &t, char *block, FILE *errfile); - static void decompress(const char *block, Tile &t); +} - static double compress_mode0(const Tile &t, char *block); - static void decompress_mode0(const char *block, Tile &t); - - static double compress_mode1(const Tile &t, char *block); - static void decompress_mode1(const char *block, Tile &t); - - static double compress_mode2(const Tile &t, char *block); - static void decompress_mode2(const char *block, Tile &t); - - static double compress_mode3(const Tile &t, char *block); - static void decompress_mode3(const char *block, Tile &t); - - static double compress_mode4(const Tile &t, char *block); - static void decompress_mode4(const char *block, Tile &t); - - static double compress_mode5(const Tile &t, char *block); - static void decompress_mode5(const char *block, Tile &t); - - static double compress_mode6(const Tile &t, char *block); - static void decompress_mode6(const char *block, Tile &t); - - static double compress_mode7(const Tile &t, char *block); - static void decompress_mode7(const char *block, Tile &t); - - static int getmode(Bits &in) - { - int mode = 0; - - if (in.read(1)) mode = 0; - else if (in.read(1)) mode = 1; - else if (in.read(1)) mode = 2; - else if (in.read(1)) mode = 3; - else if (in.read(1)) mode = 4; - else if (in.read(1)) mode = 5; - else if (in.read(1)) mode = 6; - else if (in.read(1)) mode = 7; - else mode = 8; // reserved - return mode; - } - static int getmode(const char *block) - { - int bits = block[0], mode = 0; - - if (bits & 1) mode = 0; - else if ((bits&3) == 2) mode = 1; - else if ((bits&7) == 4) mode = 2; - else if ((bits & 0xF) == 8) mode = 3; - else if ((bits & 0x1F) == 16) mode = 4; - else if ((bits & 0x3F) == 32) mode = 5; - else if ((bits & 0x7F) == 64) mode = 6; - else if ((bits & 0xFF) == 128) mode = 7; - else mode = 8; // reserved - return mode; - } -}; -#endif \ No newline at end of file +#endif diff --git a/src/nvtt/bc7/avpcl.sln b/src/nvtt/bc7/avpcl.sln deleted file mode 100644 index 395b1ce..0000000 --- a/src/nvtt/bc7/avpcl.sln +++ /dev/null @@ -1,21 +0,0 @@ -Microsoft Visual Studio Solution File, Format Version 8.00 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "avpcl", "avpcl.vcproj", "{C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}" - ProjectSection(ProjectDependencies) = postProject - EndProjectSection -EndProject -Global - GlobalSection(SolutionConfiguration) = preSolution - Debug = Debug - Release = Release - EndGlobalSection - GlobalSection(ProjectConfiguration) = postSolution - {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Debug.ActiveCfg = Debug|Win32 - {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Debug.Build.0 = Debug|Win32 - {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Release.ActiveCfg = Release|Win32 - {C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Release.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - EndGlobalSection - GlobalSection(ExtensibilityAddIns) = postSolution - EndGlobalSection -EndGlobal diff --git a/src/nvtt/bc7/avpcl.vcproj b/src/nvtt/bc7/avpcl.vcproj deleted file mode 100644 index 4857f78..0000000 --- a/src/nvtt/bc7/avpcl.vcproj +++ /dev/null @@ -1,314 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/nvtt/bc7/avpcl_mode0.cpp b/src/nvtt/bc7/avpcl_mode0.cpp index 7583b70..ba79447 100644 --- a/src/nvtt/bc7/avpcl_mode0.cpp +++ b/src/nvtt/bc7/avpcl_mode0.cpp @@ -17,13 +17,13 @@ See the License for the specific language governing permissions and limitations #include "bits.h" #include "tile.h" #include "avpcl.h" -#include "arvo/Vec4.h" -#include "arvo/Matrix.h" -#include "arvo/SVD.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" #include "utils.h" #include "endpts.h" - -#include +#include #include "shapes_three.h" @@ -33,7 +33,8 @@ See the License for the specific language governing permissions and limitations #define NSHAPES 16 #define SHAPEBITS 4 -using namespace ArvoMath; +using namespace nv; +using namespace AVPCL; #define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values @@ -88,7 +89,7 @@ struct PatternPrec }; // this is the precision for each channel and region -// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! static PatternPrec pattern_precs[NPATTERNS] = { 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, @@ -107,7 +108,7 @@ static int nbits(int n, bool issigned) } else { - assert (issigned); + nvAssert (issigned); for (nb=0; n<-1; ++nb, n>>=1) ; return nb + 1; } @@ -115,12 +116,12 @@ static int nbits(int n, bool issigned) static void transform_forward(IntEndptsRGB_2 ep[NREGIONS]) { - assert(0); + nvUnreachable(); } static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS]) { - assert(0); + nvUnreachable(); } // endpoints are 555,555; reduce to 444,444 and put the lsb bit majority in compr_bits @@ -133,7 +134,7 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpt { onescnt += endpts.A[j] & 1; compr_endpts.A[j] = endpts.A[j] >> 1; - assert (compr_endpts.A[j] < 16); + nvAssert (compr_endpts.A[j] < 16); } compr_endpts.a_lsb = onescnt >= 2; @@ -142,7 +143,7 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpt { onescnt += endpts.B[j] & 1; compr_endpts.B[j] = endpts.B[j] >> 1; - assert (compr_endpts.B[j] < 16); + nvAssert (compr_endpts.B[j] < 16); } compr_endpts.b_lsb = onescnt >= 2; } @@ -175,12 +176,12 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec for (int region = 0; region < NREGIONS; ++region) { - full_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space - full_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]+1); - full_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]+1); - full_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]+1); - full_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]+1); - full_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]+1); + full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space + full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1); + full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1); + full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1); + full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1); + full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1); compress_one(full_endpts[region], q_endpts[region]); } } @@ -194,7 +195,7 @@ static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE int x = POS_TO_X(position); int y = POS_TO_Y(position); - assert(REGION(x,y,shapeindex) == region); // double check the table + nvAssert(REGION(x,y,shapeindex) == region); // double check the table if (indices[y][x] & HIGH_INDEXBIT) { // high bit is set, swap the endpts and indices for this region @@ -236,7 +237,7 @@ static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, out.write(endpts[i].b_lsb, 1); } - assert (out.getptr() == 83); + nvAssert (out.getptr() == 83); } static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index) @@ -244,8 +245,8 @@ static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeind int mode = AVPCL::getmode(in); pat_index = 0; - assert (pat_index >= 0 && pat_index < NPATTERNS); - assert (in.getptr() == patterns[pat_index].modebits); + nvAssert (pat_index >= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); shapeindex = in.read(SHAPEBITS); p = patterns[pat_index]; @@ -263,7 +264,7 @@ static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeind endpts[i].b_lsb = in.read(1); } - assert (in.getptr() == 83); + nvAssert (in.getptr() == 83); } static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) @@ -316,10 +317,10 @@ static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, co write_indices(indices, shapeindex, out); - assert(out.getptr() == AVPCL::BITSIZE); + nvAssert(out.getptr() == AVPCL::BITSIZE); } -static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec ®ion_prec, Vec4 palette[NINDICES]) +static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec ®ion_prec, Vector4 palette[NINDICES]) { IntEndptsRGB endpts; @@ -333,30 +334,30 @@ static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const Reg // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM)); // constant alpha for (int i = 0; i < NINDICES; ++i) - palette[i].W() = RGBA_MAX; + palette[i].w = 255.0f; } static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS]) { - assert(0); + nvUnreachable(); } void AVPCL::decompress_mode0(const char *block, Tile &t) @@ -375,7 +376,7 @@ void AVPCL::decompress_mode0(const char *block, Tile &t) transform_inverse(endpts); } - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; for (int r = 0; r < NREGIONS; ++r) generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); @@ -383,7 +384,7 @@ void AVPCL::decompress_mode0(const char *block, Tile &t) read_indices(in, shapeindex, indices); - assert(in.getptr() == AVPCL::BITSIZE); + nvAssert(in.getptr() == AVPCL::BITSIZE); // lookup for (int y = 0; y < Tile::TILE_H; y++) @@ -392,17 +393,17 @@ void AVPCL::decompress_mode0(const char *block, Tile &t) } // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr -static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec ®ion_prec, double current_err, int indices[Tile::TILE_TOTAL]) +static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL]) { - Vec4 palette[NINDICES]; - double toterr = 0; - Vec4 err; + Vector4 palette[NINDICES]; + float toterr = 0; + Vector4 err; generate_palette_quantized(endpts, region_prec, palette); for (int i = 0; i < np; ++i) { - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int j = 0; j < NINDICES && besterr > 0; ++j) { @@ -425,7 +426,7 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endp for (int k = i; k < np; ++k) indices[k] = -1; - return DBL_MAX; + return FLT_MAX; } } return toterr; @@ -433,10 +434,10 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endp // assign indices given a tile, shape, and quantized endpoints, return toterr for each region static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, - int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) { // build list of possibles - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; for (int region = 0; region < NREGIONS; ++region) { @@ -444,13 +445,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp toterr[region] = 0; } - Vec4 err; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { @@ -470,8 +471,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // this function returns either old_err or a value smaller (if it was successful in improving the error) -static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, - double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) { // we have the old endpoints: old_endpts // we have the perturbed endpoints: new_endpts @@ -541,10 +542,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec // for np = 16 -- adjust error thresholds as a function of np // always ensure endpoint ordering is preserved (no need to overlap the scan) // if orig_err returned from this is less than its input value, then indices[] will contain valid indices -static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) { IntEndptsRGB_2 temp_endpts; - double best_err = orig_err; + float best_err = orig_err; int aprec = region_prec.endpt_a_prec[ch]; int bprec = region_prec.endpt_b_prec[ch]; int good_indices[Tile::TILE_TOTAL]; @@ -553,7 +554,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & for (int i=0; i 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } - adelta = MAX(adelta, 3); - bdelta = MAX(bdelta, 3); + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); #ifdef DISABLE_EXHAUSTIVE adelta = bdelta = 3; @@ -572,10 +573,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & temp_endpts = opt_endpts; // ok figure out the range of A and B - int alow = MAX(0, opt_endpts.A[ch] - adelta); - int ahigh = MIN((1<> 1) & 1; // make sure we have a valid error for temp_in - // we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts - // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) - double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); + // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position) + float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices); // now try to optimize these endpoints - double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); // if we find an improvement, update the best so far and correct the output endpoints and errors if (temp_out_err < best_err) @@ -843,9 +844,9 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_ emit compressed block with original data // to try to preserve maximum endpoint precision */ -static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) { - double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; @@ -864,8 +865,9 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp transform_inverse(orig_endpts); optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); - for (int i=0; i RGBA_MAX) v.X() = RGBA_MAX; - if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; - if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; - if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; - if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; - v.W() = RGBA_MAX; + if (v.x < 0.0f) v.x = 0.0f; + if (v.x > 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + v.w = 255.0f; } -static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES]) { for (int region = 0; region < NREGIONS; ++region) for (int i = 0; i < NINDICES; ++i) - palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); } // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined -static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) { // build list of possibles - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; generate_palette_unquantized(endpts, palette); - double toterr = 0; - Vec4 err; + float toterr = 0; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { @@ -941,19 +943,21 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt // for this mode, we assume alpha = 255 constant and compress only the RGB portion. // however, we do the error check against the actual alpha values supplied for the tile. -static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) { for (int region=0; region maxp) maxp = dp; } // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values - endpts[region].A = mean + minp*direction; - endpts[region].B = mean + maxp*direction; + endpts[region].A = mean + minp*Vector4(direction, 0); + endpts[region].B = mean + maxp*Vector4(direction, 0); // clamp endpoints // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best @@ -1020,13 +1010,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS] return map_colors(tile, shapeindex, endpts); } -static void swap(double *list1, int *list2, int i, int j) +static void swap(float *list1, int *list2, int i, int j) { - double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; } -double AVPCL::compress_mode0(const Tile &t, char *block) +float AVPCL::compress_mode0(const Tile &t, char *block) { // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out @@ -1036,10 +1026,10 @@ double AVPCL::compress_mode0(const Tile &t, char *block) struct { FltEndpts endpts[NREGIONS]; } all[NSHAPES]; - double roughmse[NSHAPES]; + float roughmse[NSHAPES]; int index[NSHAPES]; char tempblock[AVPCL::BLOCKSIZE]; - double msebest = DBL_MAX; + float msebest = FLT_MAX; for (int i=0; i0; ++i) { int shape = index[i]; - double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); if (mse < msebest) { memcpy(block, tempblock, sizeof(tempblock)); diff --git a/src/nvtt/bc7/avpcl_mode1.cpp b/src/nvtt/bc7/avpcl_mode1.cpp index bee9daa..8ee3570 100644 --- a/src/nvtt/bc7/avpcl_mode1.cpp +++ b/src/nvtt/bc7/avpcl_mode1.cpp @@ -17,17 +17,18 @@ See the License for the specific language governing permissions and limitations #include "bits.h" #include "tile.h" #include "avpcl.h" -#include "arvo/Vec4.h" -#include "arvo/Matrix.h" -#include "arvo/SVD.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" #include "utils.h" #include "endpts.h" - -#include +#include #include "shapes_two.h" -using namespace ArvoMath; +using namespace nv; +using namespace AVPCL; #define NLSBMODES 2 // number of different lsb modes per region. since we have one .1 per region, that can have 2 values @@ -83,7 +84,7 @@ struct PatternPrec // this is the precision for each channel and region -// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! static PatternPrec pattern_precs[NPATTERNS] = { 6,6,6, 6,6,6, 6,6,6, 6,6,6, @@ -102,7 +103,7 @@ static int nbits(int n, bool issigned) } else { - assert (issigned); + nvAssert (issigned); for (nb=0; n<-1; ++nb, n>>=1) ; return nb + 1; } @@ -111,12 +112,12 @@ static int nbits(int n, bool issigned) static void transform_forward(IntEndptsRGB_1 ep[NREGIONS]) { - assert(0); + nvUnreachable(); } static void transform_inverse(IntEndptsRGB_1 ep[NREGIONS]) { - assert(0); + nvUnreachable(); } // endpoints are 777,777; reduce to 666,666 and put the lsb bit majority in compr_bits @@ -131,8 +132,8 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_1& compr_endpt compr_endpts.A[j] = endpts.A[j] >> 1; onescnt += endpts.B[j] & 1; compr_endpts.B[j] = endpts.B[j] >> 1; - assert (compr_endpts.A[j] < 64); - assert (compr_endpts.B[j] < 64); + nvAssert (compr_endpts.A[j] < 64); + nvAssert (compr_endpts.B[j] < 64); } compr_endpts.lsb = onescnt >= 3; } @@ -165,12 +166,12 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec for (int region = 0; region < NREGIONS; ++region) { - full_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space - full_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]+1); - full_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]+1); - full_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]+1); - full_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]+1); - full_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]+1); + full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space + full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1); + full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1); + full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1); + full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1); + full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1); compress_one(full_endpts[region], q_endpts[region]); } } @@ -184,7 +185,7 @@ static void swap_indices(IntEndptsRGB_1 endpts[NREGIONS], int indices[Tile::TILE int x = POS_TO_X(position); int y = POS_TO_Y(position); - assert(REGION(x,y,shapeindex) == region); // double check the table + nvAssert(REGION(x,y,shapeindex) == region); // double check the table if (indices[y][x] & HIGH_INDEXBIT) { // high bit is set, swap the endpts and indices for this region @@ -220,7 +221,7 @@ static void write_header(const IntEndptsRGB_1 endpts[NREGIONS], int shapeindex, for (int i=0; i= 0 && pat_index < NPATTERNS); - assert (in.getptr() == patterns[pat_index].modebits); + nvAssert (pat_index >= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); shapeindex = in.read(SHAPEBITS); p = patterns[pat_index]; @@ -244,7 +245,7 @@ static void read_header(Bits &in, IntEndptsRGB_1 endpts[NREGIONS], int &shapeind for (int i=0; i 0; ++j) { @@ -409,7 +410,7 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_1 &endp for (int k = i; k < np; ++k) indices[k] = -1; - return DBL_MAX; + return FLT_MAX; } } return toterr; @@ -417,10 +418,10 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_1 &endp // assign indices given a tile, shape, and quantized endpoints, return toterr for each region static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endpts[NREGIONS], const PatternPrec &pattern_prec, - int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) { // build list of possibles - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; for (int region = 0; region < NREGIONS; ++region) { @@ -428,13 +429,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endp toterr[region] = 0; } - Vec4 err; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { @@ -454,8 +455,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endp // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // this function returns either old_err or a value smaller (if it was successful in improving the error) -static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts, - double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) { // we have the old endpoints: old_endpts // we have the perturbed endpoints: new_endpts @@ -525,10 +526,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec // for np = 16 -- adjust error thresholds as a function of np // always ensure endpoint ordering is preserved (no need to overlap the scan) // if orig_err returned from this is less than its input value, then indices[] will contain valid indices -static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL]) +static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL]) { IntEndptsRGB_1 temp_endpts; - double best_err = orig_err; + float best_err = orig_err; int aprec = region_prec.endpt_a_prec[ch]; int bprec = region_prec.endpt_b_prec[ch]; int good_indices[Tile::TILE_TOTAL]; @@ -537,7 +538,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & for (int i=0; i 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } - adelta = MAX(adelta, 3); - bdelta = MAX(bdelta, 3); + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); #ifdef DISABLE_EXHAUSTIVE adelta = bdelta = 3; @@ -556,10 +557,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & temp_endpts = opt_endpts; // ok figure out the range of A and B - int alow = MAX(0, opt_endpts.A[ch] - adelta); - int ahigh = MIN((1< RGBA_MAX) v.X() = RGBA_MAX; - if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; - if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; - if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; - if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; - v.W() = RGBA_MAX; + if (v.x < 0.0f) v.x = 0.0f; + if (v.x > 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + v.w = 255.0f; } -static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES]) { for (int region = 0; region < NREGIONS; ++region) for (int i = 0; i < NINDICES; ++i) - palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); } // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined -static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) { // build list of possibles - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; generate_palette_unquantized(endpts, palette); - double toterr = 0; - Vec4 err; + float toterr = 0; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { @@ -922,19 +924,21 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt return toterr; } -static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) { for (int region=0; region maxp) maxp = dp; } // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values - endpts[region].A = mean + minp*direction; - endpts[region].B = mean + maxp*direction; + endpts[region].A = mean + minp*Vector4(direction, 0); + endpts[region].B = mean + maxp*Vector4(direction, 0); // clamp endpoints // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best @@ -1001,13 +991,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS] return map_colors(tile, shapeindex, endpts); } -static void swap(double *list1, int *list2, int i, int j) +static void swap(float *list1, int *list2, int i, int j) { - double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; } -double AVPCL::compress_mode1(const Tile &t, char *block) +float AVPCL::compress_mode1(const Tile &t, char *block) { // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out @@ -1017,10 +1007,10 @@ double AVPCL::compress_mode1(const Tile &t, char *block) struct { FltEndpts endpts[NREGIONS]; } all[NSHAPES]; - double roughmse[NSHAPES]; + float roughmse[NSHAPES]; int index[NSHAPES]; char tempblock[AVPCL::BLOCKSIZE]; - double msebest = DBL_MAX; + float msebest = FLT_MAX; for (int i=0; i0; ++i) { int shape = index[i]; - double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); if (mse < msebest) { memcpy(block, tempblock, sizeof(tempblock)); diff --git a/src/nvtt/bc7/avpcl_mode2.cpp b/src/nvtt/bc7/avpcl_mode2.cpp index ef37dbe..bff191a 100644 --- a/src/nvtt/bc7/avpcl_mode2.cpp +++ b/src/nvtt/bc7/avpcl_mode2.cpp @@ -17,17 +17,18 @@ See the License for the specific language governing permissions and limitations #include "bits.h" #include "tile.h" #include "avpcl.h" -#include "arvo/Vec4.h" -#include "arvo/Matrix.h" -#include "arvo/SVD.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" #include "utils.h" #include "endpts.h" - -#include +#include #include "shapes_three.h" -using namespace ArvoMath; +using namespace nv; +using namespace AVPCL; #define NINDICES 4 #define INDEXBITS 2 @@ -80,7 +81,7 @@ struct PatternPrec // this is the precision for each channel and region -// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! static PatternPrec pattern_precs[NPATTERNS] = { @@ -100,7 +101,7 @@ static int nbits(int n, bool issigned) } else { - assert (issigned); + nvAssert (issigned); for (nb=0; n<-1; ++nb, n>>=1) ; return nb + 1; } @@ -131,12 +132,12 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS_THREE], const Patter { for (int region = 0; region < NREGIONS_THREE; ++region) { - q_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]); - q_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]); - q_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]); - q_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]); - q_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]); - q_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]); + q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]); + q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]); + q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]); + q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]); + q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]); + q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]); } } @@ -149,7 +150,7 @@ static void swap_indices(IntEndptsRGB endpts[NREGIONS_THREE], int indices[Tile:: int x = POS_TO_X(position); int y = POS_TO_Y(position); - assert(REGION(x,y,shapeindex) == region); // double check the table + nvAssert(REGION(x,y,shapeindex) == region); // double check the table if (indices[y][x] & HIGH_INDEXBIT) { // high bit is set, swap the endpts and indices for this region @@ -181,7 +182,7 @@ static void write_header(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeind out.write(endpts[i].A[j], p.chan[j].nbitsizes[i*2+0]); out.write(endpts[i].B[j], p.chan[j].nbitsizes[i*2+1]); } - assert (out.getptr() == 99); + nvAssert (out.getptr() == 99); } static void read_header(Bits &in, IntEndptsRGB endpts[NREGIONS_THREE], int &shapeindex, Pattern &p, int &pat_index) @@ -189,8 +190,8 @@ static void read_header(Bits &in, IntEndptsRGB endpts[NREGIONS_THREE], int &shap int mode = AVPCL::getmode(in); pat_index = 0; - assert (pat_index >= 0 && pat_index < NPATTERNS); - assert (in.getptr() == patterns[pat_index].modebits); + nvAssert (pat_index >= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); shapeindex = in.read(SHAPEBITS); @@ -202,7 +203,7 @@ static void read_header(Bits &in, IntEndptsRGB endpts[NREGIONS_THREE], int &shap endpts[i].A[j] = in.read(p.chan[j].nbitsizes[i*2+0]); endpts[i].B[j] = in.read(p.chan[j].nbitsizes[i*2+1]); } - assert (in.getptr() == 99); + nvAssert (in.getptr() == 99); } @@ -257,10 +258,10 @@ static void emit_block(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeindex write_indices(indices, shapeindex, out); - assert(out.getptr() == AVPCL::BITSIZE); + nvAssert(out.getptr() == AVPCL::BITSIZE); } -static void generate_palette_quantized(const IntEndptsRGB &endpts, const RegionPrec ®ion_prec, Vec4 palette[NINDICES]) +static void generate_palette_quantized(const IntEndptsRGB &endpts, const RegionPrec ®ion_prec, Vector4 palette[NINDICES]) { // scale endpoints int a, b; // really need a IntVec4... @@ -270,31 +271,31 @@ static void generate_palette_quantized(const IntEndptsRGB &endpts, const RegionP // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM)); // constant alpha for (int i = 0; i < NINDICES; ++i) - palette[i].W() = RGBA_MAX; + palette[i].w = 255.0f; } // sign extend but only if it was transformed static void sign_extend(Pattern &p, IntEndptsRGB endpts[NREGIONS_THREE]) { - assert (p.transformed != 0); + nvAssert (p.transformed != 0); for (int i=0; i 0; ++j) { @@ -373,7 +374,7 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB &endpts for (int k = i; k < np; ++k) indices[k] = -1; - return DBL_MAX; + return FLT_MAX; } } return toterr; @@ -381,10 +382,10 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB &endpts // assign indices given a tile, shape, and quantized endpoints, return toterr for each region static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, - int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS_THREE]) + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_THREE]) { // build list of possibles - Vec4 palette[NREGIONS_THREE][NINDICES]; + Vector4 palette[NREGIONS_THREE][NINDICES]; for (int region = 0; region < NREGIONS_THREE; ++region) { @@ -392,13 +393,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts toterr[region] = 0; } - Vec4 err; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { @@ -418,8 +419,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // this function returns either old_err or a value smaller (if it was successful in improving the error) -static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts, - double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) { // we have the old endpoints: old_endpts // we have the perturbed endpoints: new_endpts @@ -489,10 +490,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec // for np = 16 -- adjust error thresholds as a function of np // always ensure endpoint ordering is preserved (no need to overlap the scan) // if orig_err returned from this is less than its input value, then indices[] will contain valid indices -static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL]) +static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL]) { IntEndptsRGB temp_endpts; - double best_err = orig_err; + float best_err = orig_err; int aprec = region_prec.endpt_a_prec[ch]; int bprec = region_prec.endpt_b_prec[ch]; int good_indices[Tile::TILE_TOTAL]; @@ -501,7 +502,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & for (int i=0; i 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } - adelta = MAX(adelta, 3); - bdelta = MAX(bdelta, 3); + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); #ifdef DISABLE_EXHAUSTIVE adelta = bdelta = 3; @@ -520,10 +521,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & temp_endpts = opt_endpts; // ok figure out the range of A and B - int alow = MAX(0, opt_endpts.A[ch] - adelta); - int ahigh = MIN((1< RGBA_MAX) v.X() = RGBA_MAX; - if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; - if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; - if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; - if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; - v.W() = RGBA_MAX; + if (v.x < 0.0f) v.x = 0.0f; + if (v.x > 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + v.w = 255.0f; } -static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_THREE], Vec4 palette[NREGIONS_THREE][NINDICES]) +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_THREE], Vector4 palette[NREGIONS_THREE][NINDICES]) { for (int region = 0; region < NREGIONS_THREE; ++region) for (int i = 0; i < NINDICES; ++i) - palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); } // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined -static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_THREE]) +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_THREE]) { // build list of possibles - Vec4 palette[NREGIONS_THREE][NINDICES]; + Vector4 palette[NREGIONS_THREE][NINDICES]; generate_palette_unquantized(endpts, palette); - double toterr = 0; - Vec4 err; + float toterr = 0; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { @@ -878,19 +880,21 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt return toterr; } -static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_THREE]) +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_THREE]) { for (int region=0; region maxp) maxp = dp; } // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values - endpts[region].A = mean + minp*direction; - endpts[region].B = mean + maxp*direction; + endpts[region].A = mean + minp*Vector4(direction, 0); + endpts[region].B = mean + maxp*Vector4(direction, 0); // clamp endpoints // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best @@ -957,13 +947,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ return map_colors(tile, shapeindex, endpts); } -static void swap(double *list1, int *list2, int i, int j) +static void swap(float *list1, int *list2, int i, int j) { - double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; } -double AVPCL::compress_mode2(const Tile &t, char *block) +float AVPCL::compress_mode2(const Tile &t, char *block) { // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out @@ -973,10 +963,10 @@ double AVPCL::compress_mode2(const Tile &t, char *block) struct { FltEndpts endpts[NREGIONS_THREE]; } all[NSHAPES]; - double roughmse[NSHAPES]; + float roughmse[NSHAPES]; int index[NSHAPES]; char tempblock[AVPCL::BLOCKSIZE]; - double msebest = DBL_MAX; + float msebest = FLT_MAX; for (int i=0; i0; ++i) { int shape = index[i]; - double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); if (mse < msebest) { memcpy(block, tempblock, sizeof(tempblock)); diff --git a/src/nvtt/bc7/avpcl_mode3.cpp b/src/nvtt/bc7/avpcl_mode3.cpp index cf19759..4a2f80a 100644 --- a/src/nvtt/bc7/avpcl_mode3.cpp +++ b/src/nvtt/bc7/avpcl_mode3.cpp @@ -17,17 +17,18 @@ See the License for the specific language governing permissions and limitations #include "bits.h" #include "tile.h" #include "avpcl.h" -#include "arvo/Vec4.h" -#include "arvo/Matrix.h" -#include "arvo/SVD.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" #include "utils.h" #include "endpts.h" - -#include +#include #include "shapes_two.h" -using namespace ArvoMath; +using namespace nv; +using namespace AVPCL; #define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values @@ -84,7 +85,7 @@ struct PatternPrec // this is the precision for each channel and region -// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! static PatternPrec pattern_precs[NPATTERNS] = { 7,7,7, 7,7,7, 7,7,7, 7,7,7, @@ -103,7 +104,7 @@ static int nbits(int n, bool issigned) } else { - assert (issigned); + nvAssert (issigned); for (nb=0; n<-1; ++nb, n>>=1) ; return nb + 1; } @@ -111,12 +112,12 @@ static int nbits(int n, bool issigned) static void transform_forward(IntEndptsRGB_2 ep[NREGIONS]) { - assert(0); + nvUnreachable(); } static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS]) { - assert(0); + nvUnreachable(); } // endpoints are 888,888; reduce to 777,777 and put the lsb bit majority in compr_bits @@ -129,7 +130,7 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpt { onescnt += endpts.A[j] & 1; compr_endpts.A[j] = endpts.A[j] >> 1; - assert (compr_endpts.A[j] < 128); + nvAssert (compr_endpts.A[j] < 128); } compr_endpts.a_lsb = onescnt >= 2; @@ -138,7 +139,7 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpt { onescnt += endpts.B[j] & 1; compr_endpts.B[j] = endpts.B[j] >> 1; - assert (compr_endpts.B[j] < 128); + nvAssert (compr_endpts.B[j] < 128); } compr_endpts.b_lsb = onescnt >= 2; } @@ -171,12 +172,12 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec for (int region = 0; region < NREGIONS; ++region) { - full_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space - full_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]+1); - full_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]+1); - full_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]+1); - full_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]+1); - full_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]+1); + full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space + full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1); + full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1); + full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1); + full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1); + full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1); compress_one(full_endpts[region], q_endpts[region]); } } @@ -190,7 +191,7 @@ static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE int x = POS_TO_X(position); int y = POS_TO_Y(position); - assert(REGION(x,y,shapeindex) == region); // double check the table + nvAssert(REGION(x,y,shapeindex) == region); // double check the table if (indices[y][x] & HIGH_INDEXBIT) { // high bit is set, swap the endpts and indices for this region @@ -232,7 +233,7 @@ static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, out.write(endpts[i].b_lsb, 1); } - assert (out.getptr() == 98); + nvAssert (out.getptr() == 98); } static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index) @@ -240,8 +241,8 @@ static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeind int mode = AVPCL::getmode(in); pat_index = 0; - assert (pat_index >= 0 && pat_index < NPATTERNS); - assert (in.getptr() == patterns[pat_index].modebits); + nvAssert (pat_index >= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); shapeindex = in.read(SHAPEBITS); p = patterns[pat_index]; @@ -259,7 +260,7 @@ static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeind endpts[i].b_lsb = in.read(1); } - assert (in.getptr() == 98); + nvAssert (in.getptr() == 98); } static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) @@ -312,10 +313,10 @@ static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, co write_indices(indices, shapeindex, out); - assert(out.getptr() == AVPCL::BITSIZE); + nvAssert(out.getptr() == AVPCL::BITSIZE); } -static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec ®ion_prec, Vec4 palette[NINDICES]) +static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec ®ion_prec, Vector4 palette[NINDICES]) { IntEndptsRGB endpts; @@ -329,30 +330,30 @@ static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const Reg // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM)); // constant alpha for (int i = 0; i < NINDICES; ++i) - palette[i].W() = RGBA_MAX; + palette[i].w = 255.0f; } static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS]) { - assert(0); + nvUnreachable(); } void AVPCL::decompress_mode3(const char *block, Tile &t) @@ -371,7 +372,7 @@ void AVPCL::decompress_mode3(const char *block, Tile &t) transform_inverse(endpts); } - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; for (int r = 0; r < NREGIONS; ++r) generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); @@ -379,7 +380,7 @@ void AVPCL::decompress_mode3(const char *block, Tile &t) read_indices(in, shapeindex, indices); - assert(in.getptr() == AVPCL::BITSIZE); + nvAssert(in.getptr() == AVPCL::BITSIZE); // lookup for (int y = 0; y < Tile::TILE_H; y++) @@ -388,17 +389,17 @@ void AVPCL::decompress_mode3(const char *block, Tile &t) } // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr -static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec ®ion_prec, double current_err, int indices[Tile::TILE_TOTAL]) +static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL]) { - Vec4 palette[NINDICES]; - double toterr = 0; - Vec4 err; + Vector4 palette[NINDICES]; + float toterr = 0; + Vector4 err; generate_palette_quantized(endpts, region_prec, palette); for (int i = 0; i < np; ++i) { - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int j = 0; j < NINDICES && besterr > 0; ++j) { @@ -421,17 +422,17 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endp for (int k = i; k < np; ++k) indices[k] = -1; - return DBL_MAX; + return FLT_MAX; } } return toterr; } static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, - int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) { // build list of possibles - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; for (int region = 0; region < NREGIONS; ++region) { @@ -439,13 +440,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp toterr[region] = 0; } - Vec4 err; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { @@ -465,8 +466,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // this function returns either old_err or a value smaller (if it was successful in improving the error) -static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, - double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) { // we have the old endpoints: old_endpts // we have the perturbed endpoints: new_endpts @@ -536,10 +537,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec // for np = 16 -- adjust error thresholds as a function of np // always ensure endpoint ordering is preserved (no need to overlap the scan) // if orig_err returned from this is less than its input value, then indices[] will contain valid indices -static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) { IntEndptsRGB_2 temp_endpts; - double best_err = orig_err; + float best_err = orig_err; int aprec = region_prec.endpt_a_prec[ch]; int bprec = region_prec.endpt_b_prec[ch]; int good_indices[Tile::TILE_TOTAL]; @@ -548,7 +549,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & for (int i=0; i 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } - adelta = MAX(adelta, 3); - bdelta = MAX(bdelta, 3); + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); #ifdef DISABLE_EXHAUSTIVE adelta = bdelta = 3; @@ -567,10 +568,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & temp_endpts = opt_endpts; // ok figure out the range of A and B - int alow = MAX(0, opt_endpts.A[ch] - adelta); - int ahigh = MIN((1<> 1) & 1; // make sure we have a valid error for temp_in - // we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts - // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) - double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); + // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position) + float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices); // now try to optimize these endpoints - double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); // if we find an improvement, update the best so far and correct the output endpoints and errors if (temp_out_err < best_err) @@ -838,9 +839,9 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_ emit compressed block with original data // to try to preserve maximum endpoint precision */ -static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) { - double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; @@ -859,8 +860,9 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp transform_inverse(orig_endpts); optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); - for (int i=0; i RGBA_MAX) v.X() = RGBA_MAX; - if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; - if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; - if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; - if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; - v.W() = RGBA_MAX; + if (v.x < 0.0f) v.x = 0.0f; + if (v.x > 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + v.w = 255.0f; } -static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES]) { for (int region = 0; region < NREGIONS; ++region) for (int i = 0; i < NINDICES; ++i) - palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); } // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined -static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) { // build list of possibles - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; generate_palette_unquantized(endpts, palette); - double toterr = 0; - Vec4 err; + float toterr = 0; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { @@ -934,19 +936,21 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt return toterr; } -static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) { for (int region=0; region maxp) maxp = dp; } // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values - endpts[region].A = mean + minp*direction; - endpts[region].B = mean + maxp*direction; + endpts[region].A = mean + minp*Vector4(direction, 0); + endpts[region].B = mean + maxp*Vector4(direction, 0); // clamp endpoints // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best @@ -1013,13 +1003,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS] return map_colors(tile, shapeindex, endpts); } -static void swap(double *list1, int *list2, int i, int j) +static void swap(float *list1, int *list2, int i, int j) { - double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; } -double AVPCL::compress_mode3(const Tile &t, char *block) +float AVPCL::compress_mode3(const Tile &t, char *block) { // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out @@ -1029,10 +1019,10 @@ double AVPCL::compress_mode3(const Tile &t, char *block) struct { FltEndpts endpts[NREGIONS]; } all[NSHAPES]; - double roughmse[NSHAPES]; + float roughmse[NSHAPES]; int index[NSHAPES]; char tempblock[AVPCL::BLOCKSIZE]; - double msebest = DBL_MAX; + float msebest = FLT_MAX; for (int i=0; i0; ++i) { int shape = index[i]; - double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); if (mse < msebest) { memcpy(block, tempblock, sizeof(tempblock)); diff --git a/src/nvtt/bc7/avpcl_mode4.cpp b/src/nvtt/bc7/avpcl_mode4.cpp index cd6a5e5..80ccdee 100644 --- a/src/nvtt/bc7/avpcl_mode4.cpp +++ b/src/nvtt/bc7/avpcl_mode4.cpp @@ -17,15 +17,16 @@ See the License for the specific language governing permissions and limitations #include "bits.h" #include "tile.h" #include "avpcl.h" -#include "arvo/Vec4.h" -#include "arvo/Matrix.h" -#include "arvo/SVD.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" #include "utils.h" #include "endpts.h" +#include -#include - -using namespace ArvoMath; +using namespace nv; +using namespace AVPCL; // there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits // array 0 is always the RGB array and array 1 is always the A array @@ -111,7 +112,7 @@ struct PatternPrec }; // this is the precision for each channel and region -// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! static PatternPrec pattern_precs[NPATTERNS] = { 5,5,5,6, 5,5,5,6, @@ -131,7 +132,7 @@ static int nbits(int n, bool issigned) } else { - assert (issigned); + nvAssert (issigned); for (nb=0; n<-1; ++nb, n>>=1) ; return nb + 1; } @@ -172,15 +173,15 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec { for (int region = 0; region < NREGIONS; ++region) { - q_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]); - q_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]); - q_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]); - q_endpts[region].A[3] = Utils::quantize(endpts[region].A.W(), pattern_prec.region_precs[region].endpt_a_prec[3]); + q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]); + q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]); + q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]); + q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]); - q_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]); - q_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]); - q_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]); - q_endpts[region].B[3] = Utils::quantize(endpts[region].B.W(), pattern_prec.region_precs[region].endpt_b_prec[3]); + q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]); + q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]); + q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]); + q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]); } } @@ -196,7 +197,7 @@ static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NRE { int x = index_positions[region] & 3; int y = (index_positions[region] >> 2) & 3; - assert(REGION(x,y,shapeindex) == region); // double check the table + nvAssert(REGION(x,y,shapeindex) == region); // double check the table // swap RGB if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode)) @@ -243,7 +244,7 @@ static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, c out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]); out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]); } - assert (out.getptr() == 50); + nvAssert (out.getptr() == 50); } static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index) @@ -252,8 +253,8 @@ static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeinde pat_index = 0; - assert (pat_index >= 0 && pat_index < NPATTERNS); - assert (in.getptr() == patterns[pat_index].modebits); + nvAssert (pat_index >= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); p = patterns[pat_index]; @@ -267,7 +268,7 @@ static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeinde endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]); endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]); } - assert (in.getptr() == 50); + nvAssert (in.getptr() == 50); } static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out) @@ -275,12 +276,12 @@ static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TI // the indices we shorten is always index 0 // do the 2 bit indices first - assert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0); + nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0); for (int i = 0; i < Tile::TILE_TOTAL; ++i) out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0] // then the 3 bit indices - assert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0); + nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0); for (int i = 0; i < Tile::TILE_TOTAL; ++i) out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0] } @@ -306,10 +307,10 @@ static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, con write_indices(indices, shapeindex, indexmode, out); - assert(out.getptr() == AVPCL::BITSIZE); + nvAssert(out.getptr() == AVPCL::BITSIZE); } -static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, int indexmode, Vec3 palette_rgb[NINDICES3], float palette_a[NINDICES3]) +static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3]) { // scale endpoints for RGB int a, b; @@ -319,28 +320,28 @@ static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const // interpolate R for (int i = 0; i < NINDICES_RGB(indexmode); ++i) - palette_rgb[i].X() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]); // interpolate G for (int i = 0; i < NINDICES_RGB(indexmode); ++i) - palette_rgb[i].Y() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]); // interpolate B for (int i = 0; i < NINDICES_RGB(indexmode); ++i) - palette_rgb[i].Z() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]); // interpolate A for (int i = 0; i < NINDICES_A(indexmode); ++i) - palette_a[i] = PALETTE_LERP(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)); + palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode))); } @@ -372,10 +373,10 @@ static void rotate_tile(const Tile &in, int rotatemode, Tile &out) switch(rotatemode) { case ROTATEMODE_RGBA_RGBA: break; - case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).X(); (out.data[y][x]).X() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; - case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).Y(); (out.data[y][x]).Y() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; - case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).Z(); (out.data[y][x]).Z() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; - default: assert(0); + case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break; + case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break; + case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break; + default: nvUnreachable(); } } } @@ -395,7 +396,7 @@ void AVPCL::decompress_mode4(const char *block, Tile &t) if (p.transform_mode) transform_inverse(p.transform_mode, endpts); - Vec3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 + Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 float palette_a[NREGIONS][NINDICES3]; // could be nindices2 for (int region = 0; region < NREGIONS; ++region) @@ -405,14 +406,14 @@ void AVPCL::decompress_mode4(const char *block, Tile &t) read_indices(in, shapeindex, indexmode, indices); - assert(in.getptr() == AVPCL::BITSIZE); + nvAssert(in.getptr() == AVPCL::BITSIZE); Tile temp(t.size_x, t.size_y); // lookup for (int y = 0; y < Tile::TILE_H; y++) for (int x = 0; x < Tile::TILE_W; x++) - temp.data[y][x] = Vec4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]); + temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]); rotate_tile(temp, rotatemode, t); } @@ -420,31 +421,31 @@ void AVPCL::decompress_mode4(const char *block, Tile &t) // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr // we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far // exceeds what we already have -static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, double current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +static float map_colors(const Vector4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) { - Vec3 palette_rgb[NINDICES3]; // could be nindices2 + Vector3 palette_rgb[NINDICES3]; // could be nindices2 float palette_a[NINDICES3]; // could be nindices2 - double toterr = 0; + float toterr = 0; generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]); - Vec3 rgb; + Vector3 rgb; float a; for (int i = 0; i < np; ++i) { - double err, besterr; + float err, besterr; float palette_alpha = 0, tile_alpha = 0; if(AVPCL::flag_premult) - tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).X() : - (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).Y() : - (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).Z() : (colors[i]).W(); + tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w; - rgb.X() = (colors[i]).X(); - rgb.Y() = (colors[i]).Y(); - rgb.Z() = (colors[i]).Z(); - a = (colors[i]).W(); + rgb.x = (colors[i]).x; + rgb.y = (colors[i]).y; + rgb.z = (colors[i]).z; + a = (colors[i]).w; // compute the two indices separately // if we're doing premultiplied alpha, we need to choose first the index that @@ -453,7 +454,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm if (rotatemode == ROTATEMODE_RGBA_RGBA) { // do A index first as it has the alpha - besterr = DBL_MAX; + besterr = FLT_MAX; for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) { err = Utils::metric1(a, palette_a[j], rotatemode); @@ -470,7 +471,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm toterr += besterr; // squared-error norms are additive since we don't do the square root // do RGB index - besterr = DBL_MAX; + besterr = FLT_MAX; for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) { err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : @@ -493,13 +494,13 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm indices[INDEXARRAY_RGB][k] = -1; indices[INDEXARRAY_A][k] = -1; } - return DBL_MAX; + return FLT_MAX; } } else { // do RGB index - besterr = DBL_MAX; + besterr = FLT_MAX; int bestindex; for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) { @@ -515,13 +516,13 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm indices[INDEXARRAY_RGB][i] = j; } } - palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).X() : - (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).Y() : - (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).Z() : (assert(0),0); + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : (nvCheckMacro(0),0); toterr += besterr; // do A index - besterr = DBL_MAX; + besterr = FLT_MAX; for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) { err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) : @@ -544,7 +545,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm indices[INDEXARRAY_RGB][k] = -1; indices[INDEXARRAY_A][k] = -1; } - return DBL_MAX; + return FLT_MAX; } } } @@ -553,9 +554,9 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm // assign indices given a tile, shape, and quantized endpoints, return toterr for each region static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, - int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) + int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) { - Vec3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 + Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 float palette_a[NREGIONS][NINDICES3]; // could be nindices2 for (int region = 0; region < NREGIONS; ++region) @@ -564,25 +565,25 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int toterr[region] = 0; } - Vec3 rgb; + Vector3 rgb; float a; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr; + float err, besterr; float palette_alpha = 0, tile_alpha = 0; - rgb.X() = (tile.data[y][x]).X(); - rgb.Y() = (tile.data[y][x]).Y(); - rgb.Z() = (tile.data[y][x]).Z(); - a = (tile.data[y][x]).W(); + rgb.x = (tile.data[y][x]).x; + rgb.y = (tile.data[y][x]).y; + rgb.z = (tile.data[y][x]).z; + a = (tile.data[y][x]).w; if(AVPCL::flag_premult) - tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).X() : - (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).Y() : - (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).Z() : (tile.data[y][x]).W(); + tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w; // compute the two indices separately // if we're doing premultiplied alpha, we need to choose first the index that @@ -591,7 +592,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int if (rotatemode == ROTATEMODE_RGBA_RGBA) { // do A index first as it has the alpha - besterr = DBL_MAX; + besterr = FLT_MAX; for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) { err = Utils::metric1(a, palette_a[region][i], rotatemode); @@ -608,7 +609,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int toterr[region] += besterr; // squared-error norms are additive since we don't do the square root // do RGB index - besterr = DBL_MAX; + besterr = FLT_MAX; for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) { err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : @@ -627,7 +628,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int else { // do RGB index first as it has the alpha - besterr = DBL_MAX; + besterr = FLT_MAX; int bestindex; for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) { @@ -643,13 +644,13 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int bestindex = i; } } - palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).X() : - (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).Y() : - (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).Z() : (assert(0),0); + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : (nvCheckMacro(0),0); toterr[region] += besterr; // do A index - besterr = DBL_MAX; + besterr = FLT_MAX; for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) { err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) : @@ -670,8 +671,8 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // this function returns either old_err or a value smaller (if it was successful in improving the error) -static double perturb_one(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, - double old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +static float perturb_one(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, + float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) { // we have the old endpoints: old_endpts // we have the perturbed endpoints: new_endpts @@ -742,10 +743,10 @@ static double perturb_one(const Vec4 colors[], int np, int rotatemode, int index // if err > 40 6.25% // for np = 16 -- adjust error thresholds as a function of np // always ensure endpoint ordering is preserved (no need to overlap the scan) -static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +static float exhaustive(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) { IntEndptsRGBA temp_endpts; - double best_err = orig_err; + float best_err = orig_err; int aprec = region_prec.endpt_a_prec[ch]; int bprec = region_prec.endpt_b_prec[ch]; int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; @@ -755,7 +756,7 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm for (int i=0; i 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } - adelta = MAX(adelta, 3); - bdelta = MAX(bdelta, 3); + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); #ifdef DISABLE_EXHAUSTIVE adelta = bdelta = 3; @@ -774,10 +775,10 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm temp_endpts = opt_endpts; // ok figure out the range of A and B - int alow = MAX(0, opt_endpts.A[ch] - adelta); - int ahigh = MIN((1< RGBA_MAX) v.X() = RGBA_MAX; - if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; - if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; - if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; - if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; - if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; - if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; + if (v.x < 0.0f) v.x = 0.0f; + if (v.x > 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + if (v.w < 0.0f) v.w = 0.0f; + if (v.w > 255.0f) v.w = 255.0f; } // compute initial endpoints for the "RGB" portion and the "A" portion. @@ -1113,14 +1114,16 @@ static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) for (int region=0; region maxp) maxp = dp; - dp = alpha[i]; + dp = alphas[i] - mean.w; if (dp < mina) mina = dp; if (dp > maxa) maxa = dp; } // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values - endpts[region].A = mean + minp*direction; - endpts[region].B = mean + maxp*direction; - endpts[region].A.W() = mean.W() + mina; - endpts[region].B.W() = mean.W() + maxa; + endpts[region].A = mean + Vector4(minp*direction, mina); + endpts[region].B = mean + Vector4(maxp*direction, maxa); // clamp endpoints - // WORK: is [0,255] the right range, or should it be [0,255.5) or even [0,256) ? + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped clamp(endpts[region].A); clamp(endpts[region].B); } } -double AVPCL::compress_mode4(const Tile &t, char *block) +float AVPCL::compress_mode4(const Tile &t, char *block) { FltEndpts endpts[NREGIONS]; char tempblock[AVPCL::BLOCKSIZE]; - double msebest = DBL_MAX; + float msebest = FLT_MAX; int shape = 0; Tile t1; @@ -1208,7 +1194,7 @@ double AVPCL::compress_mode4(const Tile &t, char *block) rough(t1, shape, endpts); for (int i = 0; i < NINDEXMODES && msebest > 0; ++i) { - double mse = refine(t1, shape, r, i, endpts, tempblock); + float mse = refine(t1, shape, r, i, endpts, tempblock); if (mse < msebest) { memcpy(block, tempblock, sizeof(tempblock)); diff --git a/src/nvtt/bc7/avpcl_mode5.cpp b/src/nvtt/bc7/avpcl_mode5.cpp index d7f04da..fb1c035 100644 --- a/src/nvtt/bc7/avpcl_mode5.cpp +++ b/src/nvtt/bc7/avpcl_mode5.cpp @@ -17,15 +17,16 @@ See the License for the specific language governing permissions and limitations #include "bits.h" #include "tile.h" #include "avpcl.h" -#include "arvo/Vec4.h" -#include "arvo/Matrix.h" -#include "arvo/SVD.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" #include "utils.h" #include "endpts.h" +#include -#include - -using namespace ArvoMath; +using namespace nv; +using namespace AVPCL; // there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits // array 0 is always the RGB array and array 1 is always the A array @@ -111,7 +112,7 @@ struct PatternPrec }; // this is the precision for each channel and region -// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! static PatternPrec pattern_precs[NPATTERNS] = { 7,7,7,8, 7,7,7,8, @@ -131,7 +132,7 @@ static int nbits(int n, bool issigned) } else { - assert (issigned); + nvAssert (issigned); for (nb=0; n<-1; ++nb, n>>=1) ; return nb + 1; } @@ -172,15 +173,15 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec { for (int region = 0; region < NREGIONS; ++region) { - q_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]); - q_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]); - q_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]); - q_endpts[region].A[3] = Utils::quantize(endpts[region].A.W(), pattern_prec.region_precs[region].endpt_a_prec[3]); + q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]); + q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]); + q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]); + q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]); - q_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]); - q_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]); - q_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]); - q_endpts[region].B[3] = Utils::quantize(endpts[region].B.W(), pattern_prec.region_precs[region].endpt_b_prec[3]); + q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]); + q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]); + q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]); + q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]); } } @@ -196,7 +197,7 @@ static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NRE { int x = index_positions[region] & 3; int y = (index_positions[region] >> 2) & 3; - assert(REGION(x,y,shapeindex) == region); // double check the table + nvAssert(REGION(x,y,shapeindex) == region); // double check the table // swap RGB if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode)) @@ -243,7 +244,7 @@ static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, c out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]); out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]); } - assert (out.getptr() == 66); + nvAssert (out.getptr() == 66); } static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index) @@ -252,8 +253,8 @@ static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeinde pat_index = 0; - assert (pat_index >= 0 && pat_index < NPATTERNS); - assert (in.getptr() == patterns[pat_index].modebits); + nvAssert (pat_index >= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); p = patterns[pat_index]; @@ -269,7 +270,7 @@ static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeinde endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]); endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]); } - assert (in.getptr() == 66); + nvAssert (in.getptr() == 66); } static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out) @@ -277,12 +278,12 @@ static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TI // the indices we shorten is always index 0 // do the 2 bit indices first - assert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0); + nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0); for (int i = 0; i < Tile::TILE_TOTAL; ++i) out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0] // then the 3 bit indices - assert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0); + nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0); for (int i = 0; i < Tile::TILE_TOTAL; ++i) out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0] } @@ -308,10 +309,10 @@ static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, con write_indices(indices, shapeindex, indexmode, out); - assert(out.getptr() == AVPCL::BITSIZE); + nvAssert(out.getptr() == AVPCL::BITSIZE); } -static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, int indexmode, Vec3 palette_rgb[NINDICES3], float palette_a[NINDICES3]) +static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3]) { // scale endpoints for RGB int a, b; @@ -321,28 +322,28 @@ static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const // interpolate R for (int i = 0; i < NINDICES_RGB(indexmode); ++i) - palette_rgb[i].X() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]); // interpolate G for (int i = 0; i < NINDICES_RGB(indexmode); ++i) - palette_rgb[i].Y() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]); // interpolate B for (int i = 0; i < NINDICES_RGB(indexmode); ++i) - palette_rgb[i].Z() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); + palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]); // interpolate A for (int i = 0; i < NINDICES_A(indexmode); ++i) - palette_a[i] = PALETTE_LERP(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)); + palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode))); } static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS]) @@ -373,10 +374,10 @@ static void rotate_tile(const Tile &in, int rotatemode, Tile &out) switch(rotatemode) { case ROTATEMODE_RGBA_RGBA: break; - case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).X(); (out.data[y][x]).X() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; - case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).Y(); (out.data[y][x]).Y() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; - case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).Z(); (out.data[y][x]).Z() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; - default: assert(0); + case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break; + case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break; + case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break; + default: nvUnreachable(); } } } @@ -396,7 +397,7 @@ void AVPCL::decompress_mode5(const char *block, Tile &t) if (p.transform_mode) transform_inverse(p.transform_mode, endpts); - Vec3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 + Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 float palette_a[NREGIONS][NINDICES3]; // could be nindices2 for (int region = 0; region < NREGIONS; ++region) @@ -406,14 +407,14 @@ void AVPCL::decompress_mode5(const char *block, Tile &t) read_indices(in, shapeindex, indexmode, indices); - assert(in.getptr() == AVPCL::BITSIZE); + nvAssert(in.getptr() == AVPCL::BITSIZE); Tile temp(t.size_x, t.size_y); // lookup for (int y = 0; y < Tile::TILE_H; y++) for (int x = 0; x < Tile::TILE_W; x++) - temp.data[y][x] = Vec4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]); + temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]); rotate_tile(temp, rotatemode, t); } @@ -421,31 +422,31 @@ void AVPCL::decompress_mode5(const char *block, Tile &t) // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr // we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far // exceeds what we already have -static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, double current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +static float map_colors(const Vector4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) { - Vec3 palette_rgb[NINDICES3]; // could be nindices2 + Vector3 palette_rgb[NINDICES3]; // could be nindices2 float palette_a[NINDICES3]; // could be nindices2 - double toterr = 0; + float toterr = 0; generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]); - Vec3 rgb; + Vector3 rgb; float a; for (int i = 0; i < np; ++i) { - double err, besterr; + float err, besterr; float palette_alpha = 0, tile_alpha = 0; if(AVPCL::flag_premult) - tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).X() : - (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).Y() : - (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).Z() : (colors[i]).W(); + tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w; - rgb.X() = (colors[i]).X(); - rgb.Y() = (colors[i]).Y(); - rgb.Z() = (colors[i]).Z(); - a = (colors[i]).W(); + rgb.x = (colors[i]).x; + rgb.y = (colors[i]).y; + rgb.z = (colors[i]).z; + a = (colors[i]).w; // compute the two indices separately // if we're doing premultiplied alpha, we need to choose first the index that @@ -454,7 +455,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm if (rotatemode == ROTATEMODE_RGBA_RGBA) { // do A index first as it has the alpha - besterr = DBL_MAX; + besterr = FLT_MAX; for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) { err = Utils::metric1(a, palette_a[j], rotatemode); @@ -471,7 +472,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm toterr += besterr; // squared-error norms are additive since we don't do the square root // do RGB index - besterr = DBL_MAX; + besterr = FLT_MAX; for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) { err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : @@ -494,13 +495,13 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm indices[INDEXARRAY_RGB][k] = -1; indices[INDEXARRAY_A][k] = -1; } - return DBL_MAX; + return FLT_MAX; } } else { // do RGB index - besterr = DBL_MAX; + besterr = FLT_MAX; int bestindex; for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) { @@ -516,13 +517,13 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm indices[INDEXARRAY_RGB][i] = j; } } - palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).X() : - (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).Y() : - (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).Z() : (assert(0),0); + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : (nvCheckMacro(0),0); toterr += besterr; // do A index - besterr = DBL_MAX; + besterr = FLT_MAX; for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) { err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) : @@ -545,7 +546,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm indices[INDEXARRAY_RGB][k] = -1; indices[INDEXARRAY_A][k] = -1; } - return DBL_MAX; + return FLT_MAX; } } } @@ -554,9 +555,9 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm // assign indices given a tile, shape, and quantized endpoints, return toterr for each region static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, - int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) + int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) { - Vec3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 + Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 float palette_a[NREGIONS][NINDICES3]; // could be nindices2 for (int region = 0; region < NREGIONS; ++region) @@ -565,25 +566,25 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int toterr[region] = 0; } - Vec3 rgb; + Vector3 rgb; float a; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr; + float err, besterr; float palette_alpha = 0, tile_alpha = 0; - rgb.X() = (tile.data[y][x]).X(); - rgb.Y() = (tile.data[y][x]).Y(); - rgb.Z() = (tile.data[y][x]).Z(); - a = (tile.data[y][x]).W(); + rgb.x = (tile.data[y][x]).x; + rgb.y = (tile.data[y][x]).y; + rgb.z = (tile.data[y][x]).z; + a = (tile.data[y][x]).w; if(AVPCL::flag_premult) - tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).X() : - (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).Y() : - (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).Z() : (tile.data[y][x]).W(); + tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w; // compute the two indices separately // if we're doing premultiplied alpha, we need to choose first the index that @@ -592,7 +593,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int if (rotatemode == ROTATEMODE_RGBA_RGBA) { // do A index first as it has the alpha - besterr = DBL_MAX; + besterr = FLT_MAX; for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) { err = Utils::metric1(a, palette_a[region][i], rotatemode); @@ -609,7 +610,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int toterr[region] += besterr; // squared-error norms are additive since we don't do the square root // do RGB index - besterr = DBL_MAX; + besterr = FLT_MAX; for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) { err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : @@ -628,7 +629,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int else { // do RGB index first as it has the alpha - besterr = DBL_MAX; + besterr = FLT_MAX; int bestindex; for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) { @@ -644,13 +645,13 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int bestindex = i; } } - palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).X() : - (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).Y() : - (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).Z() : (assert(0),0); + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : (nvCheckMacro(0),0); toterr[region] += besterr; // do A index - besterr = DBL_MAX; + besterr = FLT_MAX; for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) { err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) : @@ -671,8 +672,8 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // this function returns either old_err or a value smaller (if it was successful in improving the error) -static double perturb_one(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, - double old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +static float perturb_one(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, + float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) { // we have the old endpoints: old_endpts // we have the perturbed endpoints: new_endpts @@ -743,10 +744,10 @@ static double perturb_one(const Vec4 colors[], int np, int rotatemode, int index // if err > 40 6.25% // for np = 16 -- adjust error thresholds as a function of np // always ensure endpoint ordering is preserved (no need to overlap the scan) -static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +static float exhaustive(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) { IntEndptsRGBA temp_endpts; - double best_err = orig_err; + float best_err = orig_err; int aprec = region_prec.endpt_a_prec[ch]; int bprec = region_prec.endpt_b_prec[ch]; int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; @@ -756,7 +757,7 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm for (int i=0; i 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } - adelta = MAX(adelta, 3); - bdelta = MAX(bdelta, 3); + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); #ifdef DISABLE_EXHAUSTIVE adelta = bdelta = 3; @@ -775,10 +776,10 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm temp_endpts = opt_endpts; // ok figure out the range of A and B - int alow = MAX(0, opt_endpts.A[ch] - adelta); - int ahigh = MIN((1< RGBA_MAX) v.X() = RGBA_MAX; - if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; - if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; - if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; - if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; - if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; - if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; + if (v.x < 0.0f) v.x = 0.0f; + if (v.x > 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + if (v.w < 0.0f) v.w = 0.0f; + if (v.w > 255.0f) v.w = 255.0f; } // compute initial endpoints for the "RGB" portion and the "A" portion. @@ -1114,14 +1115,16 @@ static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) for (int region=0; region maxp) maxp = dp; - dp = alpha[i]; + dp = alphas[i] - mean.w; if (dp < mina) mina = dp; if (dp > maxa) maxa = dp; } // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values - endpts[region].A = mean + minp*direction; - endpts[region].B = mean + maxp*direction; - endpts[region].A.W() = mean.W() + mina; - endpts[region].B.W() = mean.W() + maxa; + endpts[region].A = mean + Vector4(minp*direction, mina); + endpts[region].B = mean + Vector4(maxp*direction, maxa); // clamp endpoints - // WORK: is [0,255] the right range, or should it be [0,255.5) or even [0,256) ? + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped clamp(endpts[region].A); clamp(endpts[region].B); } } -double AVPCL::compress_mode5(const Tile &t, char *block) +float AVPCL::compress_mode5(const Tile &t, char *block) { FltEndpts endpts[NREGIONS]; char tempblock[AVPCL::BLOCKSIZE]; - double msebest = DBL_MAX; + float msebest = FLT_MAX; int shape = 0; Tile t1; @@ -1210,7 +1196,7 @@ double AVPCL::compress_mode5(const Tile &t, char *block) // for (int i = 0; i < NINDEXMODES && msebest > 0; ++i) for (int i = 0; i < 1 && msebest > 0; ++i) { - double mse = refine(t1, shape, r, i, endpts, tempblock); + float mse = refine(t1, shape, r, i, endpts, tempblock); if (mse < msebest) { memcpy(block, tempblock, sizeof(tempblock)); diff --git a/src/nvtt/bc7/avpcl_mode6.cpp b/src/nvtt/bc7/avpcl_mode6.cpp index 13e07fb..c168890 100644 --- a/src/nvtt/bc7/avpcl_mode6.cpp +++ b/src/nvtt/bc7/avpcl_mode6.cpp @@ -17,15 +17,17 @@ See the License for the specific language governing permissions and limitations #include "bits.h" #include "tile.h" #include "avpcl.h" -#include "arvo/Vec4.h" -#include "arvo/Matrix.h" -#include "arvo/SVD.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" #include "utils.h" #include "endpts.h" +#include -#include -using namespace ArvoMath; +using namespace nv; +using namespace AVPCL; #define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values @@ -83,7 +85,7 @@ struct PatternPrec }; // this is the precision for each channel and region -// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! static PatternPrec pattern_precs[NPATTERNS] = { 7,7,7,7, 7,7,7,7, @@ -102,7 +104,7 @@ static int nbits(int n, bool issigned) } else { - assert (issigned); + nvAssert (issigned); for (nb=0; n<-1; ++nb, n>>=1) ; return nb + 1; } @@ -143,7 +145,7 @@ static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_end // ignore the alpha channel in the count onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1); compr_endpts.A[j] = endpts.A[j] >> 1; - assert (compr_endpts.A[j] < 128); + nvAssert (compr_endpts.A[j] < 128); } compr_endpts.a_lsb = onescnt >= 2; @@ -152,7 +154,7 @@ static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_end { onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1); compr_endpts.B[j] = endpts.B[j] >> 1; - assert (compr_endpts.B[j] < 128); + nvAssert (compr_endpts.B[j] < 128); } compr_endpts.b_lsb = onescnt >= 2; } @@ -186,15 +188,15 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec for (int region = 0; region < NREGIONS; ++region) { - full_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space - full_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]+1); - full_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]+1); - full_endpts[region].A[3] = Utils::quantize(endpts[region].A.W(), pattern_prec.region_precs[region].endpt_a_prec[3]+1); + full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space + full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1); + full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1); + full_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]+1); - full_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]+1); - full_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]+1); - full_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]+1); - full_endpts[region].B[3] = Utils::quantize(endpts[region].B.W(), pattern_prec.region_precs[region].endpt_b_prec[3]+1); + full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1); + full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1); + full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1); + full_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]+1); compress_one(full_endpts[region], q_endpts[region]); } @@ -212,7 +214,7 @@ static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TIL { int x = index_positions[region] & 3; int y = (index_positions[region] >> 2) & 3; - assert(REGION(x,y,shapeindex) == region); // double check the table + nvAssert(REGION(x,y,shapeindex) == region); // double check the table if (indices[y][x] & HIGH_INDEXBIT) { // high bit is set, swap the endpts and indices for this region @@ -253,7 +255,7 @@ static void write_header(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, out.write(endpts[i].b_lsb, 1); } - assert (out.getptr() == 65); + nvAssert (out.getptr() == 65); } static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index) @@ -262,8 +264,8 @@ static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapein pat_index = 0; - assert (pat_index >= 0 && pat_index < NPATTERNS); - assert (in.getptr() == patterns[pat_index].modebits); + nvAssert (pat_index >= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); p = patterns[pat_index]; @@ -282,12 +284,12 @@ static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapein endpts[i].b_lsb = in.read(1); } - assert (in.getptr() == 65); + nvAssert (in.getptr() == 65); } static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) { - assert ((indices[0][0] & HIGH_INDEXBIT) == 0); + nvAssert ((indices[0][0] & HIGH_INDEXBIT) == 0); // the index we shorten is always index 0 for (int i = 0; i < Tile::TILE_TOTAL; ++i) @@ -320,10 +322,10 @@ static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, c write_indices(indices, shapeindex, out); - assert(out.getptr() == AVPCL::BITSIZE); + nvAssert(out.getptr() == AVPCL::BITSIZE); } -static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec ®ion_prec, Vec4 palette[NINDICES]) +static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec ®ion_prec, Vector4 palette[NINDICES]) { IntEndptsRGBA endpts; @@ -337,28 +339,28 @@ static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const Re // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].W() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].w = float(Utils::lerp(a, b, i, BIAS, DENOM)); } void AVPCL::decompress_mode6(const char *block, Tile &t) @@ -371,7 +373,7 @@ void AVPCL::decompress_mode6(const char *block, Tile &t) read_header(in, endpts, shapeindex, p, pat_index); - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; for (int r = 0; r < NREGIONS; ++r) generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); @@ -379,7 +381,7 @@ void AVPCL::decompress_mode6(const char *block, Tile &t) read_indices(in, shapeindex, indices); - assert(in.getptr() == AVPCL::BITSIZE); + nvAssert(in.getptr() == AVPCL::BITSIZE); // lookup for (int y = 0; y < Tile::TILE_H; y++) @@ -388,17 +390,17 @@ void AVPCL::decompress_mode6(const char *block, Tile &t) } // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr -static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec ®ion_prec, double current_err, int indices[Tile::TILE_TOTAL]) +static float map_colors(const Vector4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL]) { - Vec4 palette[NINDICES]; - double toterr = 0; - Vec4 err; + Vector4 palette[NINDICES]; + float toterr = 0; + Vector4 err; generate_palette_quantized(endpts, region_prec, palette); for (int i = 0; i < np; ++i) { - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int j = 0; j < NINDICES && besterr > 0; ++j) { @@ -422,7 +424,7 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &end for (int k = i; k < np; ++k) indices[k] = -1; - return DBL_MAX; + return FLT_MAX; } } return toterr; @@ -430,10 +432,10 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &end // assign indices given a tile, shape, and quantized endpoints, return toterr for each region static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, - int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) { // build list of possibles - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; for (int region = 0; region < NREGIONS; ++region) { @@ -441,13 +443,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end toterr[region] = 0; } - Vec4 err; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { @@ -468,8 +470,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // this function returns either old_err or a value smaller (if it was successful in improving the error) -static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts, - double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) { // we have the old endpoints: old_endpts // we have the perturbed endpoints: new_endpts @@ -539,10 +541,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec // for np = 16 -- adjust error thresholds as a function of np // always ensure endpoint ordering is preserved (no need to overlap the scan) // if orig_err returned from this is less than its input value, then indices[] will contain valid indices -static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) { IntEndptsRGBA_2 temp_endpts; - double best_err = orig_err; + float best_err = orig_err; int aprec = region_prec.endpt_a_prec[ch]; int bprec = region_prec.endpt_b_prec[ch]; int good_indices[Tile::TILE_TOTAL]; @@ -551,7 +553,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & for (int i=0; i 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } - adelta = MAX(adelta, 3); - bdelta = MAX(bdelta, 3); + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); #ifdef DISABLE_EXHAUSTIVE adelta = bdelta = 3; @@ -570,10 +572,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & temp_endpts = opt_endpts; // ok figure out the range of A and B - int alow = MAX(0, opt_endpts.A[ch] - adelta); - int ahigh = MIN((1<> 1) & 1; // make sure we have a valid error for temp_in - // we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts - // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) - double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); + // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position) + float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices); // now try to optimize these endpoints - double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); // if we find an improvement, update the best so far and correct the output endpoints and errors if (temp_out_err < best_err) @@ -843,9 +845,9 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_ simplify the above given that there is no transform now and that endpoints will always fit */ -static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) { - double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; @@ -858,13 +860,14 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); - for (int i=0; i RGBA_MAX) v.X() = RGBA_MAX; - if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; - if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; - if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; - if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; - if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; - if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; + if (v.x < 0.0f) v.x = 0.0f; + if (v.x > 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + if (v.w < 0.0f) v.w = 0.0f; + if (v.w > 255.0f) v.w = 255.0f; } -static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES]) { for (int region = 0; region < NREGIONS; ++region) for (int i = 0; i < NINDICES; ++i) - palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); } // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined -static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) { // build list of possibles - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; generate_palette_unquantized(endpts, palette); - double toterr = 0; - Vec4 err; + float toterr = 0; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr; + float err, besterr; besterr = Utils::metric4(tile.data[y][x], palette[region][0]); @@ -932,13 +935,13 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt return toterr; } -static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) { for (int region=0; region maxp) maxp = dp; } @@ -1011,13 +1000,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS] return map_colors(tile, shapeindex, endpts); } -static void swap(double *list1, int *list2, int i, int j) +static void swap(float *list1, int *list2, int i, int j) { - double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; } -double AVPCL::compress_mode6(const Tile &t, char *block) +float AVPCL::compress_mode6(const Tile &t, char *block) { // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out @@ -1027,10 +1016,10 @@ double AVPCL::compress_mode6(const Tile &t, char *block) struct { FltEndpts endpts[NREGIONS]; } all[NSHAPES]; - double roughmse[NSHAPES]; + float roughmse[NSHAPES]; int index[NSHAPES]; char tempblock[AVPCL::BLOCKSIZE]; - double msebest = DBL_MAX; + float msebest = FLT_MAX; for (int i=0; i0; ++i) { int shape = index[i]; - double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); if (mse < msebest) { memcpy(block, tempblock, sizeof(tempblock)); diff --git a/src/nvtt/bc7/avpcl_mode7.cpp b/src/nvtt/bc7/avpcl_mode7.cpp index b2813e8..ea9e0ec 100644 --- a/src/nvtt/bc7/avpcl_mode7.cpp +++ b/src/nvtt/bc7/avpcl_mode7.cpp @@ -17,17 +17,18 @@ See the License for the specific language governing permissions and limitations #include "bits.h" #include "tile.h" #include "avpcl.h" -#include "arvo/Vec4.h" -#include "arvo/Matrix.h" -#include "arvo/SVD.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" #include "utils.h" #include "endpts.h" - -#include +#include #include "shapes_two.h" -using namespace ArvoMath; +using namespace nv; +using namespace AVPCL; #define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values @@ -84,7 +85,7 @@ struct PatternPrec // this is the precision for each channel and region -// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! static PatternPrec pattern_precs[NPATTERNS] = { 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, @@ -103,7 +104,7 @@ static int nbits(int n, bool issigned) } else { - assert (issigned); + nvAssert (issigned); for (nb=0; n<-1; ++nb, n>>=1) ; return nb + 1; } @@ -111,12 +112,12 @@ static int nbits(int n, bool issigned) static void transform_forward(IntEndptsRGBA_2 ep[NREGIONS]) { - assert(0); + nvUnreachable(); } static void transform_inverse(IntEndptsRGBA_2 ep[NREGIONS]) { - assert(0); + nvUnreachable(); } /* @@ -154,7 +155,7 @@ static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_end // ignore the alpha channel in the count onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1); compr_endpts.A[j] = endpts.A[j] >> 1; - assert (compr_endpts.A[j] < 32); + nvAssert (compr_endpts.A[j] < 32); } compr_endpts.a_lsb = onescnt >= 2; @@ -163,7 +164,7 @@ static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_end { onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1); compr_endpts.B[j] = endpts.B[j] >> 1; - assert (compr_endpts.B[j] < 32); + nvAssert (compr_endpts.B[j] < 32); } compr_endpts.b_lsb = onescnt >= 2; } @@ -194,15 +195,15 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec for (int region = 0; region < NREGIONS; ++region) { - full_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space - full_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]+1); - full_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]+1); - full_endpts[region].A[3] = Utils::quantize(endpts[region].A.W(), pattern_prec.region_precs[region].endpt_a_prec[3]+1); + full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space + full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1); + full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1); + full_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]+1); - full_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]+1); - full_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]+1); - full_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]+1); - full_endpts[region].B[3] = Utils::quantize(endpts[region].B.W(), pattern_prec.region_precs[region].endpt_b_prec[3]+1); + full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1); + full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1); + full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1); + full_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]+1); compress_one(full_endpts[region], q_endpts[region]); } @@ -218,7 +219,7 @@ static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TIL int x = POS_TO_X(position); int y = POS_TO_Y(position); - assert(REGION(x,y,shapeindex) == region); // double check the table + nvAssert(REGION(x,y,shapeindex) == region); // double check the table if (indices[y][x] & HIGH_INDEXBIT) { // high bit is set, swap the endpts and indices for this region @@ -260,7 +261,7 @@ static void write_header(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, out.write(endpts[i].b_lsb, 1); } - assert (out.getptr() == 98); + nvAssert (out.getptr() == 98); } static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index) @@ -268,8 +269,8 @@ static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapein int mode = AVPCL::getmode(in); pat_index = 0; - assert (pat_index >= 0 && pat_index < NPATTERNS); - assert (in.getptr() == patterns[pat_index].modebits); + nvAssert (pat_index >= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); shapeindex = in.read(SHAPEBITS); p = patterns[pat_index]; @@ -287,7 +288,7 @@ static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapein endpts[i].b_lsb = in.read(1); } - assert (in.getptr() == 98); + nvAssert (in.getptr() == 98); } // WORK PLACEHOLDER -- keep it simple for now @@ -341,10 +342,10 @@ static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, c write_indices(indices, shapeindex, out); - assert(out.getptr() == AVPCL::BITSIZE); + nvAssert(out.getptr() == AVPCL::BITSIZE); } -static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec ®ion_prec, Vec4 palette[NINDICES]) +static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec ®ion_prec, Vector4 palette[NINDICES]) { IntEndptsRGBA endpts; @@ -358,34 +359,34 @@ static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const Re // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM)); a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1); // interpolate for (int i = 0; i < NINDICES; ++i) - palette[i].W() = PALETTE_LERP(a, b, i, BIAS, DENOM); + palette[i].w = float(Utils::lerp(a, b, i, BIAS, DENOM)); } // sign extend but only if it was transformed static void sign_extend(Pattern &p, IntEndptsRGBA_2 endpts[NREGIONS]) { - assert(0); + nvUnreachable(); } void AVPCL::decompress_mode7(const char *block, Tile &t) @@ -404,7 +405,7 @@ void AVPCL::decompress_mode7(const char *block, Tile &t) transform_inverse(endpts); } - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; for (int r = 0; r < NREGIONS; ++r) generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); @@ -412,7 +413,7 @@ void AVPCL::decompress_mode7(const char *block, Tile &t) read_indices(in, shapeindex, indices); - assert(in.getptr() == AVPCL::BITSIZE); + nvAssert(in.getptr() == AVPCL::BITSIZE); // lookup for (int y = 0; y < Tile::TILE_H; y++) @@ -421,17 +422,17 @@ void AVPCL::decompress_mode7(const char *block, Tile &t) } // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr -static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec ®ion_prec, double current_err, int indices[Tile::TILE_TOTAL]) +static float map_colors(const Vector4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL]) { - Vec4 palette[NINDICES]; - double toterr = 0; - Vec4 err; + Vector4 palette[NINDICES]; + float toterr = 0; + Vector4 err; generate_palette_quantized(endpts, region_prec, palette); for (int i = 0; i < np; ++i) { - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int j = 0; j < NINDICES && besterr > 0; ++j) { @@ -455,7 +456,7 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &end for (int k = i; k < np; ++k) indices[k] = -1; - return DBL_MAX; + return FLT_MAX; } } return toterr; @@ -463,10 +464,10 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &end // assign indices given a tile, shape, and quantized endpoints, return toterr for each region static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, - int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) { // build list of possibles - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; for (int region = 0; region < NREGIONS; ++region) { @@ -474,13 +475,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end toterr[region] = 0; } - Vec4 err; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { @@ -501,8 +502,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // this function returns either old_err or a value smaller (if it was successful in improving the error) -static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts, - double old_err, int do_b, int indices[Tile::TILE_TOTAL]) +static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) { // we have the old endpoints: old_endpts // we have the perturbed endpoints: new_endpts @@ -572,10 +573,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec // for np = 16 -- adjust error thresholds as a function of np // always ensure endpoint ordering is preserved (no need to overlap the scan) // if orig_err returned from this is less than its input value, then indices[] will contain valid indices -static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) { IntEndptsRGBA_2 temp_endpts; - double best_err = orig_err; + float best_err = orig_err; int aprec = region_prec.endpt_a_prec[ch]; int bprec = region_prec.endpt_b_prec[ch]; int good_indices[Tile::TILE_TOTAL]; @@ -584,7 +585,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & for (int i=0; i 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } - adelta = MAX(adelta, 3); - bdelta = MAX(bdelta, 3); + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); #ifdef DISABLE_EXHAUSTIVE adelta = bdelta = 3; @@ -603,10 +604,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec & temp_endpts = opt_endpts; // ok figure out the range of A and B - int alow = MAX(0, opt_endpts.A[ch] - adelta); - int ahigh = MIN((1<> 1) & 1; // make sure we have a valid error for temp_in - // we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts - // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) - double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); + // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position) + float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices); // now try to optimize these endpoints - double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); // if we find an improvement, update the best so far and correct the output endpoints and errors if (temp_out_err < best_err) @@ -874,9 +875,9 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_ emit compressed block with original data // to try to preserve maximum endpoint precision */ -static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) { - double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; @@ -895,8 +896,9 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp transform_inverse(orig_endpts); optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); - for (int i=0; i RGBA_MAX) v.X() = RGBA_MAX; - if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; - if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; - if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; - if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; - if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; - if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; + if (v.x < 0.0f) v.x = 0.0f; + if (v.x > 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + if (v.w < 0.0f) v.w = 0.0f; + if (v.w > 255.0f) v.w = 255.0f; } -static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES]) { for (int region = 0; region < NREGIONS; ++region) for (int i = 0; i < NINDICES; ++i) - palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); } // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined -static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) { // build list of possibles - Vec4 palette[NREGIONS][NINDICES]; + Vector4 palette[NREGIONS][NINDICES]; generate_palette_unquantized(endpts, palette); - double toterr = 0; - Vec4 err; + float toterr = 0; + Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); - double err, besterr = DBL_MAX; + float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { @@ -971,13 +973,13 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt return toterr; } -static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) { for (int region=0; region maxp) maxp = dp; } @@ -1050,13 +1038,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS] return map_colors(tile, shapeindex, endpts); } -static void swap(double *list1, int *list2, int i, int j) +static void swap(float *list1, int *list2, int i, int j) { - double t = list1[i]; list1[i] = list1[j]; list1[j] = t; + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; } -double AVPCL::compress_mode7(const Tile &t, char *block) +float AVPCL::compress_mode7(const Tile &t, char *block) { // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out @@ -1066,10 +1054,10 @@ double AVPCL::compress_mode7(const Tile &t, char *block) struct { FltEndpts endpts[NREGIONS]; } all[NSHAPES]; - double roughmse[NSHAPES]; + float roughmse[NSHAPES]; int index[NSHAPES]; char tempblock[AVPCL::BLOCKSIZE]; - double msebest = DBL_MAX; + float msebest = FLT_MAX; for (int i=0; i0; ++i) { int shape = index[i]; - double mse = refine(t, shape, &all[shape].endpts[0], tempblock); + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); if (mse < msebest) { memcpy(block, tempblock, sizeof(tempblock)); diff --git a/src/nvtt/bc7/avpclc.cpp b/src/nvtt/bc7/avpclc.cpp deleted file mode 100644 index afa8903..0000000 --- a/src/nvtt/bc7/avpclc.cpp +++ /dev/null @@ -1,348 +0,0 @@ -/* -Copyright 2007 nVidia, Inc. -Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. - -You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - -See the License for the specific language governing permissions and limitations under the License. -*/ - -// NOTE: the compressor will compress RGB tiles where the input alpha is constant at 255 -// using modes where the alpha is variable if that mode gives a smaller mean squared error. - -#include -#include -#include -#include -#include - -#include "ImfArray.h" -#include "targa.h" -#include "avpcl.h" - -using namespace std; - -static void analyze(string in1, string in2) -{ - Array2D pin1, pin2; - int w1, h1, w2, h2; - - Targa::read(in1, pin1, w1, h1); - Targa::read(in2, pin2, w2, h2); - - // choose the smaller of the two dimensions (since the old compressor would truncate to multiple-of-4 sizes) - int w = MIN(w1, w2); - int h = MIN(h1, h2); - - double nsamples = 0; - double mabse_rgb = 0, mabse_a = 0, mabse_rgba = 0, mse_rgb = 0, mse_a = 0, mse_rgba = 0; - int errdist_rgb[9], errdist_a[9], errdist_rgba[9]; - int errs[4*16]; - - for (int i=0; i<9; ++i) - errdist_rgb[i] = errdist_a[i] = errdist_rgba[i] = 0; - - int psnrhist[100]; - for (int i=0; i<100; ++i) - psnrhist[i] = 0; - bool first = true; - - int worstx, worsty; - double worstpsnr = 999.0; - - bool constant_alpha = true; - - for (int y = 0; y < h; y+=4) - for (int x = 0; x < w; x+=4) - { - int xw = MIN(w-x, 4); - int yw = MIN(h-y, 4); - int np = 0; - - float a[4], b[4]; - - for (int y0=0; y0 0 ? err : -err; - int j = i & 3; - int lsb; - - for (lsb=0; (abse>>lsb)>0; ++lsb) - ; - assert (lsb <= 8); - - if (j == 3) - { - mabse_a += (double)abse; - mse_a += (double)abse * abse; - errdist_a[lsb]++; - } - else - { - mabse_rgb += (double)abse; - mse_rgb += (double)abse * abse; - errdist_rgb[lsb]++; - } - mabse_rgba += (double)abse; - mse_rgba += (double)abse * abse; - errdist_rgba[lsb]++; - - msetile += (double)abse * abse; - } - - double psnrtile, rmsetile; - - rmsetile = sqrt(msetile / double(np)); - psnrtile = (rmsetile == 0) ? 99.0 : 20.0 * log10(255.0/rmsetile); - - if (psnrtile < worstpsnr) - { - worstx = x; worsty = y; worstpsnr = psnrtile; - } -#ifdef EXTERNAL_RELEASE - int psnrquant = (int) floor (psnrtile); // 10 means [10,11) psnrs, e.g. - // clamp just in case - psnrquant = (psnrquant < 0) ? 0 : (psnrquant > 99) ? 99 : psnrquant; - psnrhist[psnrquant]++; - if (first && psnrquant < 16) - { - first = false; - printf("Tiles with RGBA PSNR's worse than 16dB\n"); - } - if (psnrquant < 16) - printf("X %4d Y %4d RGBA PSNR %7.2f\n", x, y, psnrtile); -#endif - } - - nsamples = w * h; - - mabse_a /= nsamples; - mse_a /= nsamples; - mabse_rgb /= (nsamples*3); - mse_rgb /= (nsamples*3); - mabse_rgba /= (nsamples*4); - mse_rgba /= (nsamples*4); - - double rmse_a, psnr_a, rmse_rgb, psnr_rgb, rmse_rgba, psnr_rgba; - - rmse_a = sqrt(mse_a); - psnr_a = (rmse_a == 0) ? 999.0 : 20.0 * log10(255.0/rmse_a); - - rmse_rgb = sqrt(mse_rgb); - psnr_rgb = (rmse_rgb == 0) ? 999.0 : 20.0 * log10(255.0/rmse_rgb); - - rmse_rgba = sqrt(mse_rgba); - psnr_rgba = (rmse_rgba == 0) ? 999.0 : 20.0 * log10(255.0/rmse_rgba); - - printf("Image size compared: %dw x %dh\n", w, h); - printf("Image alpha is %s.\n", constant_alpha ? "CONSTANT" : "VARIABLE"); - if (w != w1 || w != w2 || h != h1 || h != h2) - printf("--- NOTE: only the overlap between the 2 images (%d,%d) and (%d,%d) was compared\n", w1, h1, w2, h2); - printf("Total pixels: %12d\n", w * h); - - char *which = !AVPCL::flag_premult ? "RGB" : "aRaGaB"; - - printf("\n%s Mean absolute error: %f\n", which, mabse_rgb); - printf("%s Root mean squared error: %f (MSE %f)\n", which, rmse_rgb, rmse_rgb*rmse_rgb); - printf("%s Peak signal to noise ratio in dB: %f\n", which, psnr_rgb); - printf("%s Histogram of number of channels with indicated LSB error\n", which); - for (int i = 0; i < 9; ++i) - if (errdist_rgb[i]) printf("%2d LSB error: %10d\n", i, errdist_rgb[i]); - - printf("\nAlpha Mean absolute error: %f\n", mabse_a); - printf("Alpha Root mean squared error: %f (MSE %f)\n", rmse_a, rmse_a*rmse_a); - printf("Alpha Peak signal to noise ratio in dB: %f\n", psnr_a); - printf("Alpha Histogram of number of channels with indicated LSB error\n"); - for (int i = 0; i < 9; ++i) - if (errdist_a[i]) printf("%2d LSB error: %10d\n", i, errdist_a[i]); - - printf("\nRGBA Mean absolute error: %f\n", mabse_rgba); - printf("RGBA Root mean squared error: %f (MSE %f)\n", rmse_rgba, rmse_rgba*rmse_rgba); - printf("RGBA Peak signal to noise ratio in dB: %f\n", psnr_rgba); - printf("RGBA Histogram of number of channels with indicated LSB error\n"); - for (int i = 0; i < 9; ++i) - if (errdist_rgba[i]) printf("%2d LSB error: %10d\n", i, errdist_rgba[i]); - - printf("\nWorst tile RGBA PSNR %f at x %d y %d\n", worstpsnr, worstx, worsty); -#if 0 - printf("Histogram of per-tile PSNR\n"); - for (int i = 0; i < 100; ++i) - if (psnrhist[i]) - printf("[%2d,%2d) %6d\n", i, i+1, psnrhist[i]); -#endif -} - -static bool ext(string inf, char *extension) -{ - size_t n = inf.rfind('.', inf.length()-1); - if (n != string::npos) - return inf.substr(n, inf.length()) == extension; - else if (*extension != '\0') - return false; - else - return true; // extension is null and we didn't find a . -} - -template -std::string toString(const T &thing) -{ - std::stringstream os; - os << thing; - return os.str(); -} - -static int str2int(std::string s) -{ - int thing; - std::stringstream str (stringstream::in | stringstream::out); - str << s; - str >> thing; - return thing; -} - -static void usage() -{ - cout << endl << - "Usage:" << endl << - "avpclc infile.tga outroot generates outroot-w-h.avpcl and outroot-avpcl.tga" << endl << - "avpclc foo-w-h.avpcl outroot generates outroot-avpcl.tga" << endl << - "avpclc infile.tga outfile.tga compares the two images" << endl << endl << - "Flags:" << endl << - "-p use a metric based on AR AG AB A (note: if the image has alpha constant 255 this option is overridden)" << endl << - "-n use a non-uniformly-weighed metric (weights .299 .587 .114)" << endl << - "-na use a non-uniformly-weighed metric (ATI weights .3086 .6094 .0820)" << endl << - "-e dump squared errors for each tile to outroot-errors.bin" << endl; -} - -bool AVPCL::flag_premult = false; -bool AVPCL::flag_nonuniform = false; -bool AVPCL::flag_nonuniform_ati = false; - -bool AVPCL::mode_rgb = false; - -int main(int argc, char* argv[]) -{ - bool noerrfile = true; -#ifdef EXTERNAL_RELEASE - cout << "avpcl/BC7L Targa RGBA Compressor/Decompressor version 1.41 (May 27, 2010)." << endl << - "Bug reports, questions, and suggestions to wdonovan a t nvidia d o t com." << endl; -#endif - try - { - char * args[2]; - int nargs = 0; - - // process flags, copy any non flag arg to args[] - for (int i = 1; i < argc; ++i) - if ((argv[i])[0] == '-') - switch ((argv[i])[1]) { - case 'p': AVPCL::flag_premult = true; break; - case 'n': if ((argv[i])[2] == 'a') { AVPCL::flag_nonuniform_ati = true; AVPCL::flag_nonuniform = false; } - else { AVPCL::flag_nonuniform = true; AVPCL::flag_nonuniform_ati = false; } - break; - case 'e': noerrfile = false; break; - default: throw "bad flag arg"; - } - else - { - if (nargs > 1) throw "Incorrect number of args"; - args[nargs++] = argv[i]; - } - - if (nargs != 2) throw "Incorrect number of args"; - - string inf(args[0]), outroot(args[1]); - - if (ext(outroot, "")) - { - if (ext(inf, ".tga")) - { - int width, height; - - Targa::fileinfo(inf, width, height, AVPCL::mode_rgb); - - string outf, avpclf, errf; - outf = outroot + "-avpcl.tga"; - avpclf = outroot + "-" + toString(width) + "-" + toString(height) + "-" + (AVPCL::mode_rgb ? "RGB" : "RGBA") + ".avpcl"; - cout << "Compressing " << (AVPCL::mode_rgb ? "RGB file " : "RGBA file ") << inf << " to " << avpclf << endl; - if (!noerrfile) - { - errf = outroot + "-errors" + ".bin"; - cout << "Errors output file is " << errf << endl; - } - else - errf = ""; - AVPCL::compress(inf, avpclf, errf); - cout << "Decompressing " << avpclf << " to " << outf << endl; - AVPCL::decompress(avpclf, outf); - analyze(inf, outf); - } - else if (ext(inf, ".avpcl")) - { - string outf; - outf = outroot + "-avpcl.tga"; - cout << "Decompressing " << inf << " to " << outf << endl; - AVPCL::decompress(inf, outf); - } - else throw "Invalid file args"; - } - else if (ext(inf, ".tga") && ext(outroot, ".tga")) - { - analyze(inf, outroot); - } - else throw "Invalid file args"; - - } - catch(const exception& e) - { - // Print error message and usage instructions - cerr << e.what() << endl; - usage(); - return 1; - } - catch(char * msg) - { - cerr << msg << endl; - usage(); - return 1; - } - return 0; -} diff --git a/src/nvtt/bc7/bits.h b/src/nvtt/bc7/bits.h index 3fa4af2..7b42a70 100644 --- a/src/nvtt/bc7/bits.h +++ b/src/nvtt/bc7/bits.h @@ -10,36 +10,38 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifndef _BITS_H -#define _BITS_H +#ifndef _AVPCL_BITS_H +#define _AVPCL_BITS_H // read/write a bitstream -#include +#include "nvcore/Debug.h" + +namespace AVPCL { class Bits { public: - Bits(char *data, int maxdatabits) { assert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;} - Bits(const char *data, int availdatabits) { assert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;} + Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;} + Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;} void write(int value, int nbits) { - assert (nbits >= 0 && nbits < 32); - assert (sizeof(int)>= 4); + nvAssert (nbits >= 0 && nbits < 32); + nvAssert (sizeof(int)>= 4); for (int i=0; i>i); } int read(int nbits) { - assert (nbits >= 0 && nbits < 32); - assert (sizeof(int)>= 4); + nvAssert (nbits >= 0 && nbits < 32); + nvAssert (sizeof(int)>= 4); int out = 0; for (int i=0; i= 0 && ptr < maxbits); bptr = ptr; } + void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; } int getsize() { return bend; } private: @@ -51,7 +53,7 @@ private: char readonly; // 1 if this is a read-only stream int readone() { - assert (bptr < bend); + nvAssert (bptr < bend); if (bptr >= bend) return 0; int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7)); ++bptr; @@ -60,7 +62,7 @@ private: void writeone(int bit) { if (readonly) throw "Writing a read-only bit stream"; - assert (bptr < maxbits); + nvAssert (bptr < maxbits); if (bptr >= maxbits) return; if (bit&1) bits[bptr>>3] |= 1 << (bptr & 7); @@ -70,4 +72,6 @@ private: } }; +} + #endif \ No newline at end of file diff --git a/src/nvtt/bc7/endpts.h b/src/nvtt/bc7/endpts.h index 0b33eef..06b073d 100644 --- a/src/nvtt/bc7/endpts.h +++ b/src/nvtt/bc7/endpts.h @@ -10,26 +10,26 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifndef _ENDPTS_H -#define _ENDPTS_H +#ifndef _AVPCL_ENDPTS_H +#define _AVPCL_ENDPTS_H // endpoint definitions and routines to search through endpoint space -#include "arvo/Vec4.h" +#include "nvmath/Vector.h" -using namespace ArvoMath; +namespace AVPCL { -#define NCHANNELS_RGB 3 -#define NCHANNELS_RGBA 4 -#define CHANNEL_R 0 -#define CHANNEL_G 1 -#define CHANNEL_B 2 -#define CHANNEL_A 3 +static const int NCHANNELS_RGB = 3; +static const int NCHANNELS_RGBA = 4; +static const int CHANNEL_R = 0; +static const int CHANNEL_G = 1; +static const int CHANNEL_B = 2; +static const int CHANNEL_A = 3; struct FltEndpts { - Vec4 A; - Vec4 B; + nv::Vector4 A; + nv::Vector4 B; }; struct IntEndptsRGB @@ -76,5 +76,6 @@ struct IntEndptsRGBA_2a int b_lsb; // lsb for RGB channels of A }; -#endif +} +#endif diff --git a/src/nvtt/bc7/rgba.h b/src/nvtt/bc7/rgba.h deleted file mode 100644 index d356a10..0000000 --- a/src/nvtt/bc7/rgba.h +++ /dev/null @@ -1,27 +0,0 @@ -/* -Copyright 2007 nVidia, Inc. -Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. - -You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - -See the License for the specific language governing permissions and limitations under the License. -*/ - -#ifndef _RGBA_H -#define _RGBA_H - -#define RGBA_MIN 0 -#define RGBA_MAX 255 // range of RGBA - -class RGBA -{ -public: - float r, g, b, a; - RGBA(): r(0), g(0), b(0), a(0){} - RGBA(float r, float g, float b, float a): r(r), g(g), b(b), a(a){} -}; - -#endif diff --git a/src/nvtt/bc7/shapes_three.h b/src/nvtt/bc7/shapes_three.h index c618d22..e19034e 100644 --- a/src/nvtt/bc7/shapes_three.h +++ b/src/nvtt/bc7/shapes_three.h @@ -10,8 +10,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifndef _SHAPES_THREE_H -#define _SHAPES_THREE_H +#ifndef _AVPCL_SHAPES_THREE_H +#define _AVPCL_SHAPES_THREE_H // shapes for 3 regions diff --git a/src/nvtt/bc7/shapes_two.h b/src/nvtt/bc7/shapes_two.h index d9a52ef..26424fc 100644 --- a/src/nvtt/bc7/shapes_two.h +++ b/src/nvtt/bc7/shapes_two.h @@ -10,8 +10,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifndef _SHAPES_TWO_H -#define _SHAPES_TWO_H +#ifndef _AVPCL_SHAPES_TWO_H +#define _AVPCL_SHAPES_TWO_H // shapes for two regions diff --git a/src/nvtt/bc7/targa.cpp b/src/nvtt/bc7/targa.cpp deleted file mode 100644 index 6936e74..0000000 --- a/src/nvtt/bc7/targa.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/* -Copyright 2007 nVidia, Inc. -Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. - -You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - -See the License for the specific language governing permissions and limitations under the License. -*/ - -// Quick and dirty Targa file I/O -- doesn't handle compressed format targa files, though. - -#include -#include - -#include "ImfArray.h" -#include "targa.h" -#include "rgba.h" - -Targa::Targa() {} - -Targa::~Targa() {} - -// read either RGB or RGBA files -static int readTgaHeader(FILE *fp, int& width, int& height, int &bpp, int &origin) -{ - unsigned char hdr[18]; - - if (fread(hdr, sizeof(hdr), 1, fp ) != 1) - return 0; - - if (hdr[2] != 2) - return 0; - - bpp = hdr[16]; - if (bpp != 24 && bpp != 32) - return 0; - - int alphabpp = hdr[17] & 0xF; - origin = (hdr[17] >> 4) & 0x3; - - if (bpp == 24 && alphabpp != 0) - return 0; - if (bpp == 32 && alphabpp != 8) - return 0; - - width = (hdr[13] << 8) | hdr[12]; - height = (hdr[15] << 8) | hdr[14]; - - // skip image ID field - int idsize = hdr[0]; - for (; idsize; --idsize) - (void) getc(fp); - - return 1; -} - -static void read_file(FILE *fp, Imf::Array2D& pixels, int width, int height, int bpp, int origin) -{ - pixels.resizeErase(height, width); - - // bottom to top order - for (int y = 0; y < height; ++y) - for (int x = 0; x < width; ++x) - { - float b = float(getc(fp)); - float g = float(getc(fp)); - float r = float(getc(fp)); - float a = (bpp == 24) ? RGBA_MAX : float(getc(fp)); - - int xt, yt; - - // transform based on origin - switch (origin) - { - case 0: xt = x; yt = height-1-y; break; // bottom left - case 1: xt = width-1-x; yt = y; break; // bottom right - case 2: xt = x; yt = y; break; // top left - case 3: xt = width-1-x; yt = height-1-y; break; // top right - default: throw "impossible origin value"; - } - - pixels[yt][xt].a = a; - pixels[yt][xt].r = r; - pixels[yt][xt].g = g; - pixels[yt][xt].b = b; - } -} - -void Targa::fileinfo(const std::string& filename, int& width, int& height, bool& const_alpha) -{ - int bpp, origin; - - FILE *fp = fopen(filename.c_str(), "rb"); - - if (fp == (FILE *) 0) - throw "Unable to open infile"; - - if (readTgaHeader(fp, width, height, bpp, origin) == 0) - throw "Invalid or unimplemented format for infile, needs to be a 24 or 32 bit uncompressed TGA file"; - - if (bpp == 24) - const_alpha = true; - else - { - // even if file is 32bpp the alpha may still be constant. so read file and check - Imf::Array2D pixels; - - read_file(fp, pixels, width, height, bpp, origin); - - const_alpha = true; - - for (int y=0; y& pixels, int& width, int& height) -{ - int bpp, origin; - - FILE *fp = fopen(filename.c_str(), "rb"); - - if (fp == (FILE *) 0) - throw "Unable to open infile"; - - if (readTgaHeader(fp, width, height, bpp, origin) == 0) - throw "Invalid or unimplemented format for infile, needs to be a 24 or 32 bit uncompressed TGA file"; - - read_file(fp, pixels, width, height, bpp, origin); - - fclose(fp); -} - -void Targa::write(const std::string& filename, const Imf::Array2D& pixels, int width, int height) -{ - FILE *fp = fopen(filename.c_str(), "wb"); - - if (fp == (FILE *) 0) - throw "Unable to open outfile"; - - unsigned char hdr[18]; - - // we're lazy, always write this as a 32bpp file, even if the alpha is constant 255 - - memset(hdr, 0, sizeof(hdr)); - hdr[2] = 2; - hdr[12] = width & 0xFF; - hdr[13] = width >> 8; - hdr[14] = height & 0xFF; - hdr[15] = height >> 8; - hdr[16] = 32; - hdr[17] = 0x28; - - fwrite( hdr, sizeof(hdr), 1, fp ); - - // top to bottom order - for (int y = 0; y < height; ++y) - for (int x = 0; x < width; ++x) - { - int a = int((pixels[y][x]).a + 0.5f); - int r = int((pixels[y][x]).r + 0.5f); - int g = int((pixels[y][x]).g + 0.5f); - int b = int((pixels[y][x]).b + 0.5f); - - if (b < RGBA_MIN) b = RGBA_MIN; if (b > RGBA_MAX) b = RGBA_MAX; fputc(b, fp); - if (g < RGBA_MIN) g = RGBA_MIN; if (g > RGBA_MAX) g = RGBA_MAX; fputc(g, fp); - if (r < RGBA_MIN) r = RGBA_MIN; if (r > RGBA_MAX) r = RGBA_MAX; fputc(r, fp); - if (a < RGBA_MIN) a = RGBA_MIN; if (a > RGBA_MAX) a = RGBA_MAX; fputc(a, fp); - } - fclose(fp); -} diff --git a/src/nvtt/bc7/targa.h b/src/nvtt/bc7/targa.h deleted file mode 100644 index 995df8e..0000000 --- a/src/nvtt/bc7/targa.h +++ /dev/null @@ -1,30 +0,0 @@ -/* -Copyright 2007 nVidia, Inc. -Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. - -You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - -See the License for the specific language governing permissions and limitations under the License. -*/ - -#ifndef _targa_h_ -#define _targa_h_ - -#include "ImfArray.h" -#include "rgba.h" - -class Targa -{ -public: - Targa(); - ~Targa(); - - static void fileinfo( const std::string& filename, int& width, int& height, bool& const_alpha); - static void read( const std::string& filename, Imf::Array2D& pixels, int& width, int& height ); - static void write(const std::string& filename, const Imf::Array2D& pixels, int width, int height ); -}; - -#endif /* _targa_h_ */ diff --git a/src/nvtt/bc7/tile.h b/src/nvtt/bc7/tile.h index 620ae2b..85fcc57 100644 --- a/src/nvtt/bc7/tile.h +++ b/src/nvtt/bc7/tile.h @@ -10,17 +10,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifndef _TILE_H -#define _TILE_H +#ifndef _AVPCL_TILE_H +#define _AVPCL_TILE_H -#include "ImfArray.h" +#include "nvmath/Vector.h" #include -#include "arvo/Vec4.h" #include "utils.h" -#include "rgba.h" -using namespace Imf; -using namespace ArvoMath; +namespace AVPCL { // extract a tile of pixels from an array @@ -30,38 +27,14 @@ public: static const int TILE_H = 4; static const int TILE_W = 4; static const int TILE_TOTAL = TILE_H * TILE_W; - Vec4 data[TILE_H][TILE_W]; + nv::Vector4 data[TILE_H][TILE_W]; int size_x, size_y; // actual size of tile Tile() {}; ~Tile(){}; Tile(int xs, int ys) {size_x = xs; size_y = ys;} - - // pixels -> tile - void inline insert(const Array2D &pixels, int x, int y) - { - for (int y0=0; y0 pixels - void inline extract(Array2D &pixels, int x, int y) - { - for (int y0=0; y0 -#include -#include "rgba.h" -#include "arvo/Vec3.h" -#include "arvo/Vec4.h" -static int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64 -static int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64 +using namespace nv; +using namespace AVPCL; + +static const int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64 +static const int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64 int Utils::lerp(int a, int b, int i, int bias, int denom) { #ifdef USE_ZOH_INTERP - assert (denom == 3 || denom == 7 || denom == 15); - assert (i >= 0 && i <= denom); - assert (bias >= 0 && bias <= denom/2); - assert (a >= 0 && b >= 0); + nvAssert (denom == 3 || denom == 7 || denom == 15); + nvAssert (i >= 0 && i <= denom); + nvAssert (bias >= 0 && bias <= denom/2); + nvAssert (a >= 0 && b >= 0); int round = 0; #ifdef USE_ZOH_INTERP_ROUNDED @@ -41,29 +42,29 @@ int Utils::lerp(int a, int b, int i, int bias, int denom) case 3: denom *= 5; i *= 5; // fall through to case 15 case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i] + round) >> 6; case 7: return (a*denom7_weights[denom-i] + b*denom7_weights[i] + round) >> 6; - default: assert(0); return 0; + default: nvUnreachable(); return 0; } #else return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation #endif } -Vec4 Utils::lerp(const Vec4& a, const Vec4 &b, int i, int bias, int denom) +Vector4 Utils::lerp(Vector4::Arg a, Vector4::Arg b, int i, int bias, int denom) { #ifdef USE_ZOH_INTERP - assert (denom == 3 || denom == 7 || denom == 15); - assert (i >= 0 && i <= denom); - assert (bias >= 0 && bias <= denom/2); -// assert (a >= 0 && b >= 0); + nvAssert (denom == 3 || denom == 7 || denom == 15); + nvAssert (i >= 0 && i <= denom); + nvAssert (bias >= 0 && bias <= denom/2); +// nvAssert (a >= 0 && b >= 0); // no need to bias these as this is an exact division switch (denom) { case 3: denom *= 5; i *= 5; // fall through to case 15 - case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i]) / 64.0; - case 7: return (a*denom7_weights[denom-i] + b*denom7_weights[i]) / 64.0; - default: assert(0); return 0; + case 15:return (a*float(denom15_weights[denom-i]) + b*float(denom15_weights[i])) / 64.0f; + case 7: return (a*float(denom7_weights[denom-i]) + b*float(denom7_weights[i])) / 64.0f; + default: nvUnreachable(); return Vector4(0); } #else return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation @@ -75,8 +76,7 @@ int Utils::unquantize(int q, int prec) { int unq; - assert (prec > 3); // we only want to do one replicate - assert (RGBA_MIN == 0); + nvAssert (prec > 3); // we only want to do one replicate #ifdef USE_ZOH_QUANT if (prec >= 8) @@ -84,9 +84,9 @@ int Utils::unquantize(int q, int prec) else if (q == 0) unq = 0; else if (q == ((1<> prec; + unq = (q * 256 + 128) >> prec; #else // avpcl unquantizer -- bit replicate unq = (q << (8-prec)) | (q >> (2*prec-8)); @@ -100,112 +100,111 @@ int Utils::quantize(float value, int prec) { int q, unq; - assert (prec > 3); // we only want to do one replicate - assert (RGBA_MIN == 0); + nvAssert (prec > 3); // we only want to do one replicate - unq = (int)floor(value + 0.5); - assert (unq >= RGBA_MIN && unq <= RGBA_MAX); + unq = (int)floor(value + 0.5f); + nvAssert (unq <= 255); #ifdef USE_ZOH_QUANT - q = (prec >= 8) ? unq : (unq << prec) / (RGBA_MAX+1); + q = (prec >= 8) ? unq : (unq << prec) / 256; #else // avpcl quantizer -- scale properly for best possible bit-replicated result - q = (unq * ((1<= 0 && q < (1 << prec)); + nvAssert (q >= 0 && q < (1 << prec)); return q; } -double Utils::metric4(const Vec4& a, const Vec4& b) +float Utils::metric4(Vector4::Arg a, Vector4::Arg b) { - Vec4 err = a - b; + Vector4 err = a - b; // if nonuniform, select weights and weigh away if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) { - double rwt, gwt, bwt; + float rwt, gwt, bwt; if (AVPCL::flag_nonuniform) { - rwt = 0.299; gwt = 0.587; bwt = 0.114; + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; } else if (AVPCL::flag_nonuniform_ati) { - rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; } // weigh the components - err.X() *= rwt; - err.Y() *= gwt; - err.Z() *= bwt; + err.x *= rwt; + err.y *= gwt; + err.z *= bwt; } - return err * err; + return lengthSquared(err); } // WORK -- implement rotatemode for the below -- that changes where the rwt, gwt, and bwt's go. -double Utils::metric3(const Vec3& a, const Vec3& b, int rotatemode) +float Utils::metric3(Vector3::Arg a, Vector3::Arg b, int rotatemode) { - Vec3 err = a - b; + Vector3 err = a - b; // if nonuniform, select weights and weigh away if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) { - double rwt, gwt, bwt; + float rwt, gwt, bwt; if (AVPCL::flag_nonuniform) { - rwt = 0.299; gwt = 0.587; bwt = 0.114; + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; } else if (AVPCL::flag_nonuniform_ati) { - rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; } // adjust weights based on rotatemode switch(rotatemode) { case ROTATEMODE_RGBA_RGBA: break; - case ROTATEMODE_RGBA_AGBR: rwt = 1.0; break; - case ROTATEMODE_RGBA_RABG: gwt = 1.0; break; - case ROTATEMODE_RGBA_RGAB: bwt = 1.0; break; - default: assert(0); + case ROTATEMODE_RGBA_AGBR: rwt = 1.0f; break; + case ROTATEMODE_RGBA_RABG: gwt = 1.0f; break; + case ROTATEMODE_RGBA_RGAB: bwt = 1.0f; break; + default: nvUnreachable(); } // weigh the components - err.X() *= rwt; - err.Y() *= gwt; - err.Z() *= bwt; + err.x *= rwt; + err.y *= gwt; + err.z *= bwt; } - return err * err; + return lengthSquared(err); } -double Utils::metric1(const float a, const float b, int rotatemode) +float Utils::metric1(const float a, const float b, int rotatemode) { float err = a - b; // if nonuniform, select weights and weigh away if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) { - double rwt, gwt, bwt, awt; + float rwt, gwt, bwt, awt; if (AVPCL::flag_nonuniform) { - rwt = 0.299; gwt = 0.587; bwt = 0.114; + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; } else if (AVPCL::flag_nonuniform_ati) { - rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; } // adjust weights based on rotatemode switch(rotatemode) { - case ROTATEMODE_RGBA_RGBA: awt = 1.0; break; + case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break; case ROTATEMODE_RGBA_AGBR: awt = rwt; break; case ROTATEMODE_RGBA_RABG: awt = gwt; break; case ROTATEMODE_RGBA_RGAB: awt = bwt; break; - default: assert(0); + default: nvUnreachable(); } // weigh the components @@ -218,169 +217,169 @@ double Utils::metric1(const float a, const float b, int rotatemode) float Utils::premult(float r, float a) { // note that the args are really integers stored in floats - int R = r, A = a; + int R = int(r), A = int(a); - assert ((R==r) && (A==a)); + nvAssert ((R==r) && (A==a)); - return float((R*A + RGBA_MAX/2)/RGBA_MAX); + return float((R*A + 127)/255); } -static void premult4(Vec4& rgba) +static void premult4(Vector4& rgba) { - rgba.X() = Utils::premult(rgba.X(), rgba.W()); - rgba.Y() = Utils::premult(rgba.Y(), rgba.W()); - rgba.Z() = Utils::premult(rgba.Z(), rgba.W()); + rgba.x = Utils::premult(rgba.x, rgba.w); + rgba.y = Utils::premult(rgba.y, rgba.w); + rgba.z = Utils::premult(rgba.z, rgba.w); } -static void premult3(Vec3& rgb, float a) +static void premult3(Vector3& rgb, float a) { - rgb.X() = Utils::premult(rgb.X(), a); - rgb.Y() = Utils::premult(rgb.Y(), a); - rgb.Z() = Utils::premult(rgb.Z(), a); + rgb.x = Utils::premult(rgb.x, a); + rgb.y = Utils::premult(rgb.y, a); + rgb.z = Utils::premult(rgb.z, a); } -double Utils::metric4premult(const Vec4& a, const Vec4& b) +float Utils::metric4premult(Vector4::Arg a, Vector4::Arg b) { - Vec4 pma = a, pmb = b; + Vector4 pma = a, pmb = b; premult4(pma); premult4(pmb); - Vec4 err = pma - pmb; + Vector4 err = pma - pmb; // if nonuniform, select weights and weigh away if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) { - double rwt, gwt, bwt; + float rwt, gwt, bwt; if (AVPCL::flag_nonuniform) { - rwt = 0.299; gwt = 0.587; bwt = 0.114; + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; } else if (AVPCL::flag_nonuniform_ati) { - rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; } // weigh the components - err.X() *= rwt; - err.Y() *= gwt; - err.Z() *= bwt; + err.x *= rwt; + err.y *= gwt; + err.z *= bwt; } - return err * err; + return lengthSquared(err); } -double Utils::metric3premult_alphaout(const Vec3& rgb0, float a0, const Vec3& rgb1, float a1) +float Utils::metric3premult_alphaout(Vector3::Arg rgb0, float a0, Vector3::Arg rgb1, float a1) { - Vec3 pma = rgb0, pmb = rgb1; + Vector3 pma = rgb0, pmb = rgb1; premult3(pma, a0); premult3(pmb, a1); - Vec3 err = pma - pmb; + Vector3 err = pma - pmb; // if nonuniform, select weights and weigh away if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) { - double rwt, gwt, bwt; + float rwt, gwt, bwt; if (AVPCL::flag_nonuniform) { - rwt = 0.299; gwt = 0.587; bwt = 0.114; + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; } else if (AVPCL::flag_nonuniform_ati) { - rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; } // weigh the components - err.X() *= rwt; - err.Y() *= gwt; - err.Z() *= bwt; + err.x *= rwt; + err.y *= gwt; + err.z *= bwt; } - return err * err; + return lengthSquared(err); } -double Utils::metric3premult_alphain(const Vec3& rgb0, const Vec3& rgb1, int rotatemode) +float Utils::metric3premult_alphain(Vector3::Arg rgb0, Vector3::Arg rgb1, int rotatemode) { - Vec3 pma = rgb0, pmb = rgb1; + Vector3 pma = rgb0, pmb = rgb1; switch(rotatemode) { case ROTATEMODE_RGBA_RGBA: // this function isn't supposed to be called for this rotatemode - assert(0); + nvUnreachable(); break; case ROTATEMODE_RGBA_AGBR: - pma.Y() = premult(pma.Y(), pma.X()); - pma.Z() = premult(pma.Z(), pma.X()); - pmb.Y() = premult(pmb.Y(), pmb.X()); - pmb.Z() = premult(pmb.Z(), pmb.X()); + pma.y = premult(pma.y, pma.x); + pma.z = premult(pma.z, pma.x); + pmb.y = premult(pmb.y, pmb.x); + pmb.z = premult(pmb.z, pmb.x); break; case ROTATEMODE_RGBA_RABG: - pma.X() = premult(pma.X(), pma.Y()); - pma.Z() = premult(pma.Z(), pma.Y()); - pmb.X() = premult(pmb.X(), pmb.Y()); - pmb.Z() = premult(pmb.Z(), pmb.Y()); + pma.x = premult(pma.x, pma.y); + pma.z = premult(pma.z, pma.y); + pmb.x = premult(pmb.x, pmb.y); + pmb.z = premult(pmb.z, pmb.y); break; case ROTATEMODE_RGBA_RGAB: - pma.X() = premult(pma.X(), pma.Z()); - pma.Y() = premult(pma.Y(), pma.Z()); - pmb.X() = premult(pmb.X(), pmb.Z()); - pmb.Y() = premult(pmb.Y(), pmb.Z()); + pma.x = premult(pma.x, pma.z); + pma.y = premult(pma.y, pma.z); + pmb.x = premult(pmb.x, pmb.z); + pmb.y = premult(pmb.y, pmb.z); break; - default: assert(0); + default: nvUnreachable(); } - Vec3 err = pma - pmb; + Vector3 err = pma - pmb; // if nonuniform, select weights and weigh away if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) { - double rwt, gwt, bwt; + float rwt, gwt, bwt; if (AVPCL::flag_nonuniform) { - rwt = 0.299; gwt = 0.587; bwt = 0.114; + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; } else if (AVPCL::flag_nonuniform_ati) { - rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; } // weigh the components - err.X() *= rwt; - err.Y() *= gwt; - err.Z() *= bwt; + err.x *= rwt; + err.y *= gwt; + err.z *= bwt; } - return err * err; + return lengthSquared(err); } -double Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode) +float Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode) { float err = premult(rgb0, a0) - premult(rgb1, a1); // if nonuniform, select weights and weigh away if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) { - double rwt, gwt, bwt, awt; + float rwt, gwt, bwt, awt; if (AVPCL::flag_nonuniform) { - rwt = 0.299; gwt = 0.587; bwt = 0.114; + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; } else if (AVPCL::flag_nonuniform_ati) { - rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; } // adjust weights based on rotatemode switch(rotatemode) { - case ROTATEMODE_RGBA_RGBA: awt = 1.0; break; + case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break; case ROTATEMODE_RGBA_AGBR: awt = rwt; break; case ROTATEMODE_RGBA_RABG: awt = gwt; break; case ROTATEMODE_RGBA_RGAB: awt = bwt; break; - default: assert(0); + default: nvUnreachable(); } // weigh the components diff --git a/src/nvtt/bc7/utils.h b/src/nvtt/bc7/utils.h index 5c08ffd..4e213a5 100644 --- a/src/nvtt/bc7/utils.h +++ b/src/nvtt/bc7/utils.h @@ -11,49 +11,39 @@ See the License for the specific language governing permissions and limitations */ // utility class holding common routines -#ifndef _UTILS_H -#define _UTILS_H +#ifndef _AVPCL_UTILS_H +#define _AVPCL_UTILS_H -#include "arvo/Vec4.h" +#include "nvmath/Vector.h" -using namespace ArvoMath; +namespace AVPCL { -#ifndef MIN -#define MIN(x,y) ((x)<(y)?(x):(y)) -#endif +inline int SIGN_EXTEND(int x, int nb) { return ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)); } -#ifndef MAX -#define MAX(x,y) ((x)>(y)?(x):(y)) -#endif +static const int INDEXMODE_BITS = 1; // 2 different index modes +static const int NINDEXMODES = (1<<(INDEXMODE_BITS)); +static const int INDEXMODE_ALPHA_IS_3BITS = 0; +static const int INDEXMODE_ALPHA_IS_2BITS = 1; -#define PALETTE_LERP(a, b, i, bias, denom) Utils::lerp(a, b, i, bias, denom) - -#define SIGN_EXTEND(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)) - -#define INDEXMODE_BITS 1 // 2 different index modes -#define NINDEXMODES (1<<(INDEXMODE_BITS)) -#define INDEXMODE_ALPHA_IS_3BITS 0 -#define INDEXMODE_ALPHA_IS_2BITS 1 - -#define ROTATEMODE_BITS 2 // 4 different rotate modes -#define NROTATEMODES (1<<(ROTATEMODE_BITS)) -#define ROTATEMODE_RGBA_RGBA 0 -#define ROTATEMODE_RGBA_AGBR 1 -#define ROTATEMODE_RGBA_RABG 2 -#define ROTATEMODE_RGBA_RGAB 3 +static const int ROTATEMODE_BITS = 2; // 4 different rotate modes +static const int NROTATEMODES = (1<<(ROTATEMODE_BITS)); +static const int ROTATEMODE_RGBA_RGBA = 0; +static const int ROTATEMODE_RGBA_AGBR = 1; +static const int ROTATEMODE_RGBA_RABG = 2; +static const int ROTATEMODE_RGBA_RGAB = 3; class Utils { public: // error metrics - static double metric4(const Vec4& a, const Vec4& b); - static double metric3(const Vec3& a, const Vec3& b, int rotatemode); - static double metric1(float a, float b, int rotatemode); + static float metric4(nv::Vector4::Arg a, nv::Vector4::Arg b); + static float metric3(nv::Vector3::Arg a, nv::Vector3::Arg b, int rotatemode); + static float metric1(float a, float b, int rotatemode); - static double metric4premult(const Vec4& rgba0, const Vec4& rgba1); - static double metric3premult_alphaout(const Vec3& rgb0, float a0, const Vec3& rgb1, float a1); - static double metric3premult_alphain(const Vec3& rgb0, const Vec3& rgb1, int rotatemode); - static double metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode); + static float metric4premult(nv::Vector4::Arg rgba0, nv::Vector4::Arg rgba1); + static float metric3premult_alphaout(nv::Vector3::Arg rgb0, float a0, nv::Vector3::Arg rgb1, float a1); + static float metric3premult_alphain(nv::Vector3::Arg rgb0, nv::Vector3::Arg rgb1, int rotatemode); + static float metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode); static float Utils::premult(float r, float a); @@ -63,7 +53,9 @@ public: // lerping static int lerp(int a, int b, int i, int bias, int denom); - static Vec4 lerp(const Vec4& a, const Vec4 &b, int i, int bias, int denom); + static nv::Vector4 lerp(nv::Vector4::Arg a, nv::Vector4::Arg b, int i, int bias, int denom); }; +} + #endif \ No newline at end of file diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index 6a03538..d2b88e9 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -102,7 +102,7 @@ namespace nvtt Format_CTX1, // Not supported on CPU yet. Format_BC6, - Format_BC7, // Not supported yet. + Format_BC7, Format_DXT1_Luma, }; diff --git a/src/nvtt/tests/testsuite.cpp b/src/nvtt/tests/testsuite.cpp index 5658d46..f1d6e99 100644 --- a/src/nvtt/tests/testsuite.cpp +++ b/src/nvtt/tests/testsuite.cpp @@ -188,6 +188,7 @@ enum Mode { Mode_BC5_Normal_Quartic, //Mode_BC5_Normal_DualParaboloid, Mode_BC6, + Mode_BC7, Mode_Count }; static const char * s_modeNames[] = { @@ -207,6 +208,7 @@ static const char * s_modeNames[] = { "BC5-Normal-Quartic", // Mode_BC5_Normal_Quartic, //"BC5-Normal-DualParaboloid", // Mode_BC5_Normal_DualParaboloid, "BC6", // Mode_BC6, + "BC7", // Mode_BC7, }; nvStaticCheck(NV_ARRAY_SIZE(s_modeNames) == Mode_Count); @@ -216,14 +218,14 @@ struct Test { Mode modes[6]; }; static Test s_imageTests[] = { - {"Color", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC6, /*Mode_BC3_LUVW*/}}, + {"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}}, {"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}}, //{"Normal", 3, {Mode_BC1_Normal, Mode_BC3_Normal, Mode_BC5_Normal}}, {"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}}, {"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}}, {"HDR", 2, {Mode_BC3_RGBM, Mode_BC6}}, - //{"BC6", 1, {Mode_BC6}}, // temporary mode for testing - //{"BC7", 1, {Mode_BC7}}, // temporary mode for testing + {"BC6", 1, {Mode_BC6}}, + {"BC7", 1, {Mode_BC7}}, }; const int s_imageTestCount = ARRAY_SIZE(s_imageTests); @@ -616,6 +618,10 @@ int main(int argc, char *argv[]) { format = nvtt::Format_BC6; } + else if (mode == Mode_BC7) + { + format = nvtt::Format_BC7; + } else { nvDebugCheck(false); diff --git a/src/nvtt/tools/compress.cpp b/src/nvtt/tools/compress.cpp index ce67700..a417567 100644 --- a/src/nvtt/tools/compress.cpp +++ b/src/nvtt/tools/compress.cpp @@ -265,11 +265,10 @@ int main(int argc, char *argv[]) { format = nvtt::Format_BC6; } - // !!!UNDONE: add BC7 support - /*else if (strcmp("-bc7", argv[i]) == 0) + else if (strcmp("-bc7", argv[i]) == 0) { format = nvtt::Format_BC7; - }*/ + } // Undocumented option. Mainly used for testing. else if (strcmp("-ext", argv[i]) == 0) @@ -355,7 +354,7 @@ int main(int argc, char *argv[]) printf(" -bc4 \tBC4 format (ATI1)\n"); printf(" -bc5 \tBC5 format (3Dc/ATI2)\n"); printf(" -bc6 \tBC6 format\n"); - //printf(" -bc7 \tBC7 format\n\n"); + printf(" -bc7 \tBC7 format\n\n"); printf("Output options:\n"); printf(" -silent \tDo not output progress messages\n");