295 Commits
2.0 ... 2.0.6

Author SHA1 Message Date
de8f0153c0 Tag 2.0.6 for release. 2009-03-19 19:06:30 +00:00
820eb374d5 Add single color table generation code. 2009-03-19 10:06:42 +00:00
974cacda5f Update single color compression tables. 2009-03-19 10:00:43 +00:00
953a63d7b5 Add farbrausch images to testsuite. 2009-03-19 10:00:26 +00:00
19477d60c0 Fix errors in the pixel format conversion code. 2009-03-19 08:57:49 +00:00
8a48250bcb Fix win32 errors and errors in the png saving code. 2009-03-19 08:57:28 +00:00
18d95584cc Add farbrausch textures. 2009-03-19 06:28:32 +00:00
9b3075030e Add data to repro bug reports. 2009-03-19 06:18:31 +00:00
dd98ce6eab Add one more exclude rule to pkg builder script. 2009-03-18 06:55:54 +00:00
35ff0e5aa6 Remove verbose error checking. 2009-03-18 05:51:15 +00:00
8529dcf755 Fix memory leaks.
Shutdown CUDA properly when nvtt context is destroyed.
Fixes issue 83.
2009-03-18 05:46:53 +00:00
56543e1a92 Merge changes from 2.0 2009-03-18 04:05:39 +00:00
72543c9307 Add todo message. 2009-03-17 08:16:00 +00:00
872c61e1d1 Add image saving code. 2009-03-17 08:14:28 +00:00
7f3cee4db9 Remove commented out code. 2009-03-17 06:33:31 +00:00
3f036a11a6 Avoid msvc8 warnings. 2009-03-16 21:08:09 +00:00
3df9aff396 rename gnuwin back to gnuwin32 2009-03-16 19:42:25 +00:00
be12367910 fix blend, add setborder and fill methods. 2009-03-16 09:05:32 +00:00
c59a2e0a4b Implement alpha premultiplication and color blending. 2009-03-16 08:54:43 +00:00
0abec17ab4 Implement toGreyScale.
Skip undefined images.
2009-03-16 08:47:20 +00:00
6b67f4a0d7 More progress with imperative api.
Rename Texture to TexImage.
Implement image initialization.
Add stubs for all methods.
2009-03-16 08:37:07 +00:00
6e2565d1a4 Install doc files in doc folder. 2009-03-16 06:05:51 +00:00
3e3c8a4d18 Generate debs on linux only. 2009-03-15 18:35:00 +00:00
8e836591ee Fix warning. 2009-03-15 18:34:46 +00:00
50b8b67185 Hide file format especific savers. Add generic image saver.
Misc fixes under OSX.
2009-03-15 10:18:54 +00:00
36850f6241 Include readme in the installer. 2009-03-14 08:02:47 +00:00
ed58bd90ff Update readme. 2009-03-14 08:02:33 +00:00
ab73c790e1 Testsuite cleanups and improvements.
Add ctest support.
Add FileSystem::changeDirectory method.
2009-03-14 07:27:25 +00:00
4a34c673a4 Add testsuite images. 2009-03-14 07:09:26 +00:00
0ce578668f Compile libraries as dynamic libs.
Excluse qtproject folder in source packages.
Add support for package generation using cpack.
2009-03-14 06:04:33 +00:00
53e6c4c911 Update project files with recently added files. 2009-03-14 03:31:12 +00:00
d99cf11e2e Update version checking code. 2009-03-14 03:30:20 +00:00
d9832ed22c Eliminate some warnings with MSVC. 2009-03-14 03:29:43 +00:00
a02649faa9 Fallback to CPU compressor only on smaller images. 2009-03-14 03:29:14 +00:00
a28ebb4ccf Some more progress in the imperative API. 2009-03-07 07:14:00 +00:00
0f5a5e5d24 Some more progress in the imperative API. 2009-03-05 05:34:28 +00:00
8f0b129a52 Add RefCounted base class back to the reposotory. 2009-03-05 05:33:53 +00:00
098bc2f905 Fix some endiannes errors. 2009-03-04 09:36:40 +00:00
5943e8f42f Fix errors on ibook G4. 2009-03-04 07:04:32 +00:00
ba72ebafcb Delete win64 dlls, not very useful without the corresponding lib files. 2009-03-02 09:50:47 +00:00
a89735994c Update vc9 project files. 2009-03-02 09:45:17 +00:00
e7fd290af6 Update changelog. 2009-03-02 09:28:11 +00:00
35b97e7a13 Add property file. 2009-03-02 09:27:51 +00:00
472c2d691f Update vc8 project files. 2009-03-02 09:27:07 +00:00
e48f56a15e Includer project headers first. 2009-03-02 09:21:48 +00:00
25e32c8ff2 Fix msvc warnings. 2009-03-02 09:21:30 +00:00
db63ba7fa4 Fix build in win64. 2009-03-02 09:21:07 +00:00
3df66be089 Do not use CUDA to compress small mipmaps. 2009-03-02 09:09:05 +00:00
3a52923697 Add alpha command line option. 2009-03-02 09:07:07 +00:00
9953883d26 Cleanup cmake files. 2009-03-02 07:32:00 +00:00
5ed9090012 Move poshlib to extern. 2009-03-02 07:30:38 +00:00
93e2fb46a9 Update changelog and buildpkg script. 2009-03-02 07:01:06 +00:00
03c9ec0f62 More cleanup. Remove files that are not strictly required. 2009-03-01 02:38:24 +00:00
88fc5ca18e Merge changes from private tree.
Eliminate files that are not needed for NVTT.
2009-03-01 00:18:47 +00:00
6fb29816a2 Gnome thumbnailer. Fixes issue 82. 2009-02-03 09:32:54 +00:00
c67edca820 Update changelog. 2009-02-03 09:30:54 +00:00
9d5242594b Add gnome thumbnailer by Frank Richter. Fixes issue 82. 2009-02-03 09:29:25 +00:00
69c74d7a5e Add support for comments. 2009-02-03 09:23:58 +00:00
b7ea7a255b Fix const-correctness. 2009-02-03 09:22:30 +00:00
17790a34df Add support for PNG in nvdecompress. Patch by Frank Richter. Fixes issue 80. 2009-02-03 09:08:39 +00:00
7741a99b90 Add support for saving PNG files. Patch by Frank Richter. Fixes issue 79. 2009-02-03 09:06:21 +00:00
36dd7fb76b Merge changes from p4. 2009-02-03 08:22:35 +00:00
8fa870bf0c Fix typo. Fixes issue 82. 2009-02-03 08:02:20 +00:00
1afdf2da8a Fix alpha-weighted filtering. 2009-01-28 12:10:04 +00:00
242aa4254e Use alpha-aware resize when alpha is used for transparency and it's not premultiplied. 2009-01-28 10:58:57 +00:00
4f576d5539 Add support for alpha weighting to float image. 2009-01-28 10:55:23 +00:00
2411f3f5db Fix generation of blended sobel filter. Fixes issue 77. 2009-01-28 00:56:27 +00:00
1c6b65ad52 Do not use custom FindGLUT cmake script. 2009-01-27 23:39:33 +00:00
f92a2191f2 Print message when cuda acceleration enabled. 2009-01-20 10:43:56 +00:00
7f9b10329b Add squish.h to project. 2009-01-19 10:43:53 +00:00
49409e9d92 Cleanup color rounding and expansion. 2009-01-19 10:42:31 +00:00
f753cc9702 Implement FileSystem::exists correctly on win32. 2009-01-19 10:41:51 +00:00
07a4daed7b Add FileSystem.{h,cpp} to project. 2009-01-19 10:41:09 +00:00
2ad15489bb Try to optimize color rounding and expansion.
Detect CUDA 2.1 properly.
2009-01-19 09:54:43 +00:00
fa53ddcecd Add NV5x/G8x DXT decompression code.
Clean things up a bit, remove old code.
2009-01-13 08:25:04 +00:00
7a8b3aecc9 Fix name of libpng dll. 2009-01-10 04:40:46 +00:00
94357626f7 Merge fixes from 2.0 branch. 2009-01-10 01:31:02 +00:00
19342d8adf Use timer class instead of clock.
Check that file exists before opening.
2009-01-09 05:46:24 +00:00
2ed4fee447 Fix error sin string builder and path. 2009-01-09 05:45:36 +00:00
f03d702d84 Implement exists with access instead of stat. 2009-01-09 05:45:02 +00:00
10de10b9c2 Implement FileSystem::exists(). 2009-01-09 02:24:32 +00:00
508f9fbdc2 Start implementing experimental interface. 2009-01-05 10:17:06 +00:00
e965b0e4a9 Include correct headers. 2009-01-04 07:29:35 +00:00
1f4d313d0f Merge changes from internal branch. Whitespace changes only. 2008-12-29 11:34:39 +00:00
dc0b78ad14 Do not enable testing. 2008-12-29 11:33:48 +00:00
b8eb12afc1 Merge changes from internal branch.
- Better support for win64.
2008-12-29 11:33:20 +00:00
1975883bed Update after changes in nvcore. 2008-12-29 11:29:45 +00:00
9bda107603 Add pull push filter with bilinear filtering. 2008-12-29 11:28:29 +00:00
b4f17b968a Merge changes from internal branch.
- Add frustum class and bezier evaluation functions.
- Add component accessors to vector.
- Add matrix constructors.
- Fix errors in sparse solvers.
- Better robust orthogonalization.
- Fix montecarlo distribution.
2008-12-29 11:27:13 +00:00
e5ae0c0e20 Merge internal branch.
- Remove old/unused code.
- Remove format string constructors.
- Better win64 support (vsscanf, prefetch, etc).
- Fix radix sort to sort -0 correctly.
- Add misc utilities (constraints, timer, cpuinfo, introsort).
2008-12-29 11:20:06 +00:00
a03411e451 Check version properly. 2008-12-16 20:25:12 +00:00
931580edc5 Include cuda runtime properly. 2008-12-09 11:26:08 +00:00
fd2f5465f8 Do not use kahansum, that was stupid.
Include cuda runtime properly.
2008-12-09 11:25:46 +00:00
127052f404 Use KahanSum to compute RMSE.
Fix typos.
2008-12-07 23:15:06 +00:00
a30490ab9b Preserve cluster location for empty partitions. 2008-11-24 10:35:42 +00:00
1ec115c7ec Cleanup 4 means compressor. 2008-11-24 10:35:07 +00:00
a4f56b65b8 Add support for alpha modes in the CPU compressors. Fixes issue 30. 2008-11-24 10:34:16 +00:00
bb69acec6c Add FileSystem to build. 2008-11-23 22:25:47 +00:00
4bbf5e96f4 Add squish external compressor.
Rename our squish version to nvsquish.
2008-11-23 08:59:56 +00:00
4a85f8e48d Remove executable flag. 2008-11-22 22:12:05 +00:00
f34b7ce84f Merge optimizations from squish. 2008-11-22 22:10:51 +00:00
010905edd3 Fix tabs. 2008-11-22 22:10:11 +00:00
7bb2d55d35 Create output directory. 2008-11-22 22:08:31 +00:00
e3a7cc19dd Add file system helper. 2008-11-22 22:07:07 +00:00
379605d30a Use metric to measure distance to clusters. 2008-11-22 21:32:27 +00:00
c05c4e155b Merge optimizations from official squish release. 2008-11-22 11:36:06 +00:00
fd73484bfc Merge optimizations from official squish relese. 2008-11-22 11:35:13 +00:00
f29d7dd938 Try using 4 means clustering. 2008-11-22 11:34:29 +00:00
3a5dc4783a Add support for regressions. 2008-11-22 11:33:31 +00:00
d4a713451e Fix 4 means clustering. 2008-11-22 11:32:51 +00:00
a302475fa6 Add fitting implementation to project. 2008-11-22 11:30:46 +00:00
6e988ea4c8 Add stream to the vc8 project. 2008-11-22 11:28:18 +00:00
7731181900 Update changelog. 2008-11-22 08:37:56 +00:00
41f6e0ba73 Try 4-means. 2008-11-22 08:37:14 +00:00
11073171a1 Rename stress to testsuite. Install target. 2008-11-22 08:36:55 +00:00
0805832b44 Remove squish build and project files. 2008-11-22 08:36:17 +00:00
a4dcd414ca Fix errors. 2008-11-22 08:35:04 +00:00
4ff8a83f90 Add fitting code to build. 2008-11-22 08:30:55 +00:00
48da357385 Add PCA, and 4-means implementation. 2008-11-22 08:30:20 +00:00
e1916d43c8 Do not mix tabs and spaces. 2008-11-22 00:14:05 +00:00
321f320bfb Do not print stupid messages. 2008-11-22 00:13:14 +00:00
df32fedc7c Add command line options.
Reformat output for easier parsing.
2008-11-21 09:09:57 +00:00
a7396b70ba Fix segfault on linux. Merged from 2.0 branch. 2008-11-21 09:08:00 +00:00
d9ca49cc5e Fix bug in dxt decompression.
Output files as TGA.
2008-11-21 08:06:25 +00:00
56849b78ad Output compressed files. 2008-11-21 07:58:49 +00:00
a769831fb5 Add const keyword to const arguments. 2008-11-21 07:57:28 +00:00
7486201a7e Fix bug in testsuite. 2008-11-21 07:45:09 +00:00
1813624992 Modify stress test to compress a list of real images. 2008-11-21 07:38:12 +00:00
5fa27adfcd Add custom error code and message for the case when container format does not support a certain output format. 2008-11-19 08:10:54 +00:00
6d1891a7e9 Remove gcc-4.3 warning. 2008-11-14 02:19:39 +00:00
8fb1d70d0b Fix bug detected by gcc-4.3. 2008-11-14 02:19:07 +00:00
c26c52d59c Fix gcc-4.3 warnings. 2008-11-14 02:18:35 +00:00
c3329d4675 Fix gcc-4.3 warning. 2008-11-14 02:16:36 +00:00
1cefc366f8 Remove unused function. 2008-11-14 02:15:35 +00:00
7df0885c4f Fix CUDA detection code on linux. 2008-11-14 01:15:36 +00:00
1c5da0e341 Do not use cuda API when CUDA not found.
Fix end of lines.
2008-11-10 21:54:03 +00:00
36ba75b598 Select fastest device. 2008-10-30 04:50:41 +00:00
1628831878 Fix comment. 2008-10-27 08:00:46 +00:00
12e774ea74 Fix cmake file. 2008-10-26 05:56:32 +00:00
66b18f2dbd Fix build under VC7 2008-10-22 03:48:06 +00:00
9ea1934097 Update vc8 projects. 2008-10-17 18:40:16 +00:00
9771e72702 Update vc9 projects. 2008-10-17 18:38:55 +00:00
7776bd5c17 Win32 fixes. 2008-10-17 18:37:17 +00:00
6d8a75462a Build nvtt as a shared library. 2008-10-16 22:22:45 +00:00
cf18077eda Prevent missmatches between incompatible versions of the CUDA runtime and the CUDA driver. 2008-10-16 22:21:21 +00:00
aa37e7a868 Add library loading helpers. 2008-10-16 22:20:31 +00:00
d01a5c1661 Workaround bug in CUDA runtime. When using CUDA 2.0, it's required to use a driver that supports CUDA 2.0. 2008-10-16 08:39:58 +00:00
36ed6bebda Update changelog with branch 2.0 fixes. 2008-10-15 07:17:20 +00:00
5234060618 Integrate branch 2.0 to trunk. 2008-10-15 07:16:57 +00:00
f402f28643 Use unsigned ints for stream sizes and positions. 2008-10-15 07:15:50 +00:00
f047043eb2 Fix compiler errors under gcc-4.3 2008-10-15 07:15:00 +00:00
7eac4195c4 Fix compiler errors under gcc-4.3 2008-10-15 07:14:25 +00:00
0f5692d1ea Compile CUDA files as C++. 2008-10-11 06:43:57 +00:00
b2d6122769 Fix linux build. 2008-10-05 19:20:42 +00:00
cd59058fc2 Fix linux includes. 2008-10-05 19:17:59 +00:00
db14e048e1 Fix errors in Green and Alpha optimal compressors. 2008-10-02 07:33:05 +00:00
0c36fcf626 Update changelog. 2008-10-01 22:30:19 +00:00
68be24bf00 Set correct DXT5n swizzle code.
Select swizzle codes in nvtt instead of nvimage.
2008-10-01 22:28:57 +00:00
b284669993 Try some optimizations. 2008-10-01 22:28:01 +00:00
2f6e885ced Add DXT1 compressor that uses texture to avoid CPU swizzling.
Fix errors under emulation.
Experiment with DXT5 compressor.
2008-10-01 22:24:53 +00:00
1957120c26 Reference gnuwin32 libs and include paths correctly. 2008-09-11 07:56:39 +00:00
d7ddcb9263 Set optimal options for release vc9 projects.
Fixes issue 62.
2008-09-11 07:48:02 +00:00
f5f6e88585 Do not use freeimage yet. 2008-08-20 22:34:08 +00:00
13e2d2e447 Fix float support. 2008-08-20 22:32:54 +00:00
0b13b6d0d9 Update version number. 2008-08-20 22:31:14 +00:00
ad85b0fcbe Include gnuwin32 directory properly. 2008-08-20 22:30:41 +00:00
0515d9a0a0 Add Half.{h,cpp} to project. 2008-07-31 10:00:05 +00:00
16adf94635 Add support for floating point output formats.
Images are currently output in linear space, some color transforms not applied.
2008-07-31 09:55:22 +00:00
e9002a7d86 Adding support for floating point input/output. Work in progress. 2008-07-31 02:04:44 +00:00
3161fca9d9 Decompress DDS10 files. Only BC# supported so far. 2008-07-30 02:28:09 +00:00
bb5b02df0e Adding support for floating point images as input. 2008-07-29 08:56:40 +00:00
1941e27148 Fix DDS10 header initialization.
Fix depth initialization.
2008-07-29 08:43:42 +00:00
02c3abb394 Fix color transforms. 2008-07-29 06:05:11 +00:00
86ef67bbfa Fix error in input image transformation. 2008-07-29 05:45:35 +00:00
79529f994f Fix compilation error. 2008-07-29 05:44:31 +00:00
c2508d9eeb Add option to use dds10 headers. 2008-07-29 02:31:57 +00:00
b1cd916105 Change parameters in declaration to match definition. 2008-07-29 02:31:09 +00:00
96655b3e7c Work in progress:
- better support for DDS10 file format.
- support for RGBA pixel formats with more than 32 bits.
- support for pixel types other than UNORM.
2008-07-26 10:03:12 +00:00
529c0075e1 do not compile mpeg tests. 2008-07-26 09:01:00 +00:00
c70e5d6121 Reorg header files. 2008-07-05 09:10:45 +00:00
7394644719 Move ui to separate folder. 2008-07-05 09:10:00 +00:00
d5055300e2 Require cmake 2.6.0 2008-07-05 09:09:23 +00:00
b2e7d717c2 Fix compilation error under gcc/linux. 2008-07-05 08:57:03 +00:00
756f12c994 Fix errors in color transforms.
Add support for color offsets.
Add support for special swizzles that select default const values.
2008-06-30 10:59:57 +00:00
206bfcf0f3 reorg included files 2008-06-28 08:40:32 +00:00
15cfd1c06b fix path to gnulibs 2008-06-28 02:57:31 +00:00
8b26ecc865 fix path to gnulibs 2008-06-28 02:56:57 +00:00
488f3c8f42 fix path to gnu libs. 2008-06-28 02:56:27 +00:00
458b8814a7 fix path to gnulibs. 2008-06-28 02:55:35 +00:00
c08acc8a71 Add single color compressor to optimal green compressor.
Improve quality of DXT1 green compressor increasing search range.
2008-06-28 02:50:09 +00:00
45f7244f20 Check in proposed fix for issue 44. 2008-06-27 18:52:49 +00:00
f412ec8efb Fix assertion. 2008-06-26 07:23:31 +00:00
a1a34f546f Do not compile nvmpegenc. 2008-06-26 07:23:09 +00:00
7ef88c6f7e Fix build on OSX 10.5. Solves issue 44. 2008-06-26 07:22:34 +00:00
3368f9039b Fix embarrasing typo. 2008-06-24 21:47:47 +00:00
870a3fe438 Add references. 2008-06-23 19:24:59 +00:00
82bed4ac9a Eliminate warning. 2008-06-19 10:01:56 +00:00
b8a9395117 Fix end of lines. 2008-06-19 09:53:09 +00:00
65f769160d Fix solution. 2008-06-19 09:52:20 +00:00
524ebbec8c Add PhotoshopExporter template project to solution. 2008-06-19 00:38:15 +00:00
ce85eaff3e Add photoshop exporter template. 2008-06-19 00:37:15 +00:00
6befe3505c Enable Qt ui with win32/msvc. 2008-06-19 00:35:47 +00:00
ff6f7f0506 Add quick and dirty single frame mpeg encoder based on ffmpeg. 2008-06-13 08:12:58 +00:00
55e7d3dec4 Delete outdated comment. 2008-06-03 06:34:18 +00:00
b5e373b734 Compile NVTT under G5/leopard 2008-05-23 22:22:09 +00:00
6ce542b5c0 Fix error in cmake file. 2008-05-23 06:51:02 +00:00
58e5f6534f Print version number in copyright statement. 2008-05-22 21:48:40 +00:00
eda4786ca6 Update version number. 2008-05-22 21:48:19 +00:00
29a720bf82 Do not print text. 2008-05-22 21:32:09 +00:00
1120f83f7d Fix errors. 2008-05-22 21:31:44 +00:00
c38c3dc584 Fix win32 buid errors and warnings due to FreeImage. 2008-05-22 21:31:15 +00:00
7d3d0ede9d Update projects to use FreeImage. 2008-05-22 21:30:34 +00:00
bc66a7ad74 Update cmake files. 2008-05-22 19:47:52 +00:00
ca8d17abf5 Reorganize external libs. 2008-05-22 19:47:23 +00:00
10e79dac9d Update changelog. 2008-05-21 19:22:43 +00:00
e068964423 Fix EXR loading code. Issue 45. Fix provided by alastairpatrick. 2008-05-21 19:17:27 +00:00
ea340443d9 Add -mipfilter command line option, per request of Noel Llopis. 2008-05-19 20:10:05 +00:00
fb2b0cb38c Fix bug reported by Noel Llopis.
Make sure FreeImage declarations are not used when FreeImage is not available.
2008-05-19 18:23:42 +00:00
c01566cd2f Add support for FreeImage in nvimage.
Add support for floating point input images in nvtt.
2008-05-15 09:47:55 +00:00
47bdab8e27 Define FREEIMAGE_LIBRARIES. 2008-05-15 09:46:45 +00:00
70267fda15 Add support for input floating point images. Patch provided by Jim Tilander. See issue 27. 2008-05-15 06:18:24 +00:00
aebcea412c Search for freeimage. 2008-05-15 06:17:46 +00:00
bccdcd49da Use standard gram schmidt 2008-05-15 06:04:17 +00:00
68e9f05794 Add freeimage cmake file. 2008-05-15 05:57:11 +00:00
0f186e688f Remove 'virtual' from non virtual methods. 2008-05-08 21:15:05 +00:00
38e9652d64 Remove executable flag. 2008-05-08 18:20:55 +00:00
f08114c1b5 Whops, check fix for vc9. 2008-05-08 18:18:53 +00:00
7b9f891f92 Update vc9 projects. 2008-05-07 21:58:50 +00:00
787c9bb8fb Add vc9 projects. 2008-05-07 21:55:33 +00:00
42220b981e Add vc9 folder 2008-05-07 21:45:48 +00:00
70331a37fd Refactor compression functions, group them into class methods. 2008-05-06 23:55:19 +00:00
2ffc4cd7ad Ups, checked in the file before saving it. 2008-05-06 23:34:46 +00:00
4ba8e87a38 Ups, checked in file before saving it. 2008-05-06 23:34:13 +00:00
d440d68aa8 Update ChangeLog. 2008-05-06 23:22:04 +00:00
48f61dbfc0 Add support for linear and swizzle transforms. Fixes issue 4. 2008-05-06 23:21:39 +00:00
94c3fa75a8 Add comments indicating where to perform linear color transforms. 2008-05-06 22:01:23 +00:00
c562af6d9b Integrate YCoCg color space conversion by Jim Tilander. 2008-05-06 21:49:10 +00:00
a889f2fda6 Add support for alpha modes in the CUDA compressors. 2008-05-06 20:04:05 +00:00
d855d0461b Add single color checks to CUDA compressors.
Use optimized bitmap table for CTX compressor.
2008-05-06 19:52:27 +00:00
246f2a409a Add cpp file to project so that a library is built. 2008-05-06 19:48:43 +00:00
6a6b3edce1 factorial optimization suggested by pponywong. 2008-05-06 06:37:06 +00:00
52b3bc9437 Update project files. Remove fast compressor, add optimal compressor. 2008-04-29 22:34:09 +00:00
f6ab357b09 Add missing files to project. 2008-04-29 22:33:42 +00:00
ce3a65c03e Fix operator= in String class. 2008-04-29 22:32:12 +00:00
8d9bf5c0b3 Fix after refactoring CPU compressors. Changes were not tested with CUDA enabled. 2008-04-29 22:31:55 +00:00
ab5265e642 Remove declaration of method that was removed. 2008-04-28 08:39:24 +00:00
fd1d5e41c7 Add missing files! 2008-04-28 06:22:26 +00:00
3980d5dc21 Update changelog. 2008-04-26 09:17:16 +00:00
15e7125b4b Check for single color blocks in all compressors. 2008-04-26 09:16:56 +00:00
921ee354c0 Remove legacy compressors.
Add iteration count parameter to iterative alpha compressor.
Add optimal compressors.
2008-04-26 08:02:30 +00:00
e3f7e303e4 Use FLT_MAX instead of INFINITE. The latter not supported in msvc. 2008-04-20 06:01:50 +00:00
1df69495fc Precompute fast cluster fit factors, and store as static const.
nvtt is completely reentrant now. Fixes issue 37.
cleanup interface of cuda compressors.
2008-04-18 08:49:32 +00:00
91eb30667f Add TLS class wrapper.
Fix AutoPtr operator=.
Fix typo.
2008-04-17 18:39:01 +00:00
6db5cffca6 Fix changelog. 2008-04-17 09:28:38 +00:00
34ae5bcb6f Merge 2.0 branch fixes. 2008-04-17 07:17:46 +00:00
fe130a9906 Add DXT1a single color compressor. 2008-04-17 07:00:51 +00:00
bade8e5e09 Merge private branch. 2008-04-17 07:00:19 +00:00
141a05edf4 Merge private branch. 2008-04-17 06:59:29 +00:00
7d3facd81a Merge private branch. 2008-04-17 06:59:13 +00:00
17a4f765fb Merge private branch. 2008-04-17 06:58:43 +00:00
cb91740591 Merge private branch. 2008-04-17 06:58:18 +00:00
d10295fbf6 Use DXT1a single color compressor. 2008-04-17 06:55:26 +00:00
fa5e1f5a07 Add single color DXT1a compressor. 2008-04-17 06:54:29 +00:00
9d47e100f1 Add better support for the DX10 DDS formats. 2008-04-11 23:58:41 +00:00
db1b30ee4b Update changelog. 2008-04-11 22:04:59 +00:00
4c759f999c Integrate decompressor tool improvements submitted by Amorilia. 2008-04-11 22:03:42 +00:00
299ad176fc Add experimental image based interface. 2008-04-11 08:06:15 +00:00
5070cc98d3 Do not use constructor that initializes POD types. 2008-04-11 06:50:36 +00:00
133ebfb282 Remove unused parameter warnings.
Do not compile tokenizer; it's not being used, and does not work on win64 yet.
2008-04-09 09:06:19 +00:00
ebe8054728 Cache HAVE_* variables so that they can be edited through the cmake gui. 2008-04-06 05:59:13 +00:00
aa14653d96 Do not cache CUDA_FOUND variable. 2008-04-06 05:54:53 +00:00
389adb5368 Update change log, merge 2.0 changes, add attributions. 2008-03-27 04:45:11 +00:00
bd3314f4af Add inputOptions argument to compressors, so that they can access alpha mode. 2008-03-27 04:28:17 +00:00
065c5f0689 Cleanup simple compressors. Move code from FastCompress to QuickCompress. 2008-03-20 01:39:02 +00:00
cc8656f12b Update project files. 2008-03-14 08:42:24 +00:00
d2384cf47f Remove unused methods. 2008-03-14 08:40:48 +00:00
aff59c22b8 remove unused compressors 2008-03-14 08:40:11 +00:00
59be16d40a Remove unused fitting code. 2008-03-14 08:39:03 +00:00
b7a724448b Remove unnecesary dependency. 2008-03-14 07:32:59 +00:00
259e7c58fd Merge Viktor Linder patch into 2.0 and trunk.
Fixes RGB modes with less than 32 bpp.
2008-03-11 21:22:54 +00:00
307c8b99ee Add support for premultiplied alpha. Patch by Charles Nicholson. 2008-03-07 00:41:03 +00:00
6b933c4f62 Fix post-build command. Copy headers to include/nvtt/. 2008-03-06 20:28:43 +00:00
fd1ac3c61f Add quotes around post build command arguments. Reported by Richard Sim. 2008-03-05 23:26:12 +00:00
65aa7e1eaa Add interface for swizzle color xform. 2008-03-05 22:35:16 +00:00
f5ae4c1a9a Fix indexMirror error reported by Chris Lambert. 2008-03-05 19:42:45 +00:00
75c09220c8 Fix Image copy ctor bug reported by Richard Sim. 2008-03-05 19:11:41 +00:00
9f4b4bd532 Update comments about hole filling algorithms. 2008-03-04 00:13:44 +00:00
bce983f39e Add post build command to copy header files. 2008-02-28 22:07:08 +00:00
ff93ad41cb Fix end of lines. 2008-02-28 21:45:46 +00:00
56c7771100 Fix end of lines. 2008-02-28 21:45:26 +00:00
ccced843e3 Use smaller allocations to prevent errors.
Check for allocation errors.
2008-02-28 21:45:04 +00:00
dafe2b8841 Hide copy ctor and operator to prevent compiler warnings.
Wrap pimpl using NVTT_DECLARE_PIMPL macro.
2008-02-28 21:14:40 +00:00
e3e7fcb226 Check cuda errors to find out whether the cuda context initialization succeeded. 2008-02-28 17:52:32 +00:00
970395fba8 Fix osx build. 2008-02-28 17:02:29 +00:00
8a24a93e2f Disable CUDA when memory allocations fail. 2008-02-28 16:06:27 +00:00
86 changed files with 4321 additions and 5652 deletions

View File

@ -1,4 +1,4 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.4.0)
CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0)
PROJECT(NV)
ENABLE_TESTING()
@ -16,6 +16,13 @@ MESSAGE(STATUS "Setting optimal options")
MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}")
MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}")
IF(NVTT_SHARED)
SET(NVCORE_SHARED TRUE)
SET(NVMATH_SHARED TRUE)
SET(NVIMAGE_SHARED TRUE)
ENDIF(NVTT_SHARED)
ADD_SUBDIRECTORY(src)
IF(WIN32)

View File

@ -1,11 +1,51 @@
NVIDIA Texture Tools version 2.1.0
* CTX1 CUDA compressor.
* DXT1n CUDA compressor.
NVIDIA Texture Tools version 2.0.6
* Fix dll version checking.
* Detect CUDA 2.1 and future CUDA versions correctly.
* Print CUDA detection message in nvcompress.
* Select the fastest CUDA device.
* Compile squish with -fPIC. Fixes issue 74.
* Fix warnings under gcc 4.3.2.
* Fix nvzoom option typo by Frank Richter. Fixes issue 81.
* Do not use CUDA to compress small mipmaps. Fixes issue 76.
* Compute mipmaps of semi-transparent images correctly.
* Shutdown CUDA properly. Fixes issue 83.
* Fix pixel format converions. Fixes issue 87.
* Update single color compression tables. Fixes issue 85.
NVIDIA Texture Tools version 2.0.5
* Fix error in single color compressor. Fixes issue 66.
* Detect mismatch between CUDA runtime and driver, and disable CUDA in that case.
* Fix cmake files when compiling NVTT as a shared library.
* When linking nvtt dynamically on unix, link all libraries dynamically.
* Select fastest CUDA device.
NVIDIA Texture Tools version 2.0.4
* Fix error in RGB format output; reported by jonsoh. See issue 49.
* Added support RGB format dithering by jonsoh. Fixes issue 50 and 51.
* Prevent infinite loop in indexMirror when width equal 1. Fixes issue 65.
* Implement general scale filter, including upsampling.
NVIDIA Texture Tools version 2.0.3
* More accurate DXT3 compressor. Fixes issue 38.
* Remove legacy compressors. Fix issue 34.
* Check for single color in all compressors. Fixes issue 43.
* Fix error in fast downsample filter, reported by Noel Llopis.
NVIDIA Texture Tools version 2.0.2
* Fix copy ctor error reported by Richard Sim.
* Fix indexMirror error reported by Chris Lambert.
* Fix vc8 post build command, reported by Richard Sim.
* Fix RGBA modes with less than 32 bpp by Viktor Linder.
* Fix alpha decompression by Amorilia. See issue 40.
* Avoid default-initialized constructors for POD types, reported by Jim Tilander.
* Add single color compresor for DXT1a.
* Set swizzle code to ATI2 files. See issue 41.
NVIDIA Texture Tools version 2.0.1
* Fix memory leaks.
* Pre-allocate device memory for CUDA compressor.
* Add single color compressor.
* Add single color compressor. Thanks to Amir Ebrahimi.
* Better CUDA error checking.
NVIDIA Texture Tools version 2.0.0
* Fixed PSNR formula in nvimgdiff.

View File

@ -2,7 +2,7 @@
--------------------------------------------------------------------------------
NVIDIA Texture Tools
README.txt
Version 2.1
Version 2.0
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------

View File

@ -1 +1 @@
2.1.0
2.0.6

View File

@ -46,9 +46,9 @@ FIND_LIBRARY (CUDA_RUNTIME_LIBRARY
DOC "The CUDA runtime library")
IF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY)
SET (CUDA_FOUND 1 CACHE STRING "Set to 1 if CUDA is found, 0 otherwise")
SET (CUDA_FOUND TRUE)
ELSE (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY)
SET (CUDA_FOUND 0 CACHE STRING "Set to 1 if CUDA is found, 0 otherwise")
SET (CUDA_FOUND FALSE)
ENDIF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY)
SET (CUDA_LIBRARIES ${CUDA_RUNTIME_LIBRARY})
@ -57,7 +57,7 @@ MARK_AS_ADVANCED (CUDA_FOUND CUDA_COMPILER CUDA_RUNTIME_LIBRARY)
#SET(CUDA_OPTIONS "-ncfe")
SET(CUDA_OPTIONS "")
SET(CUDA_OPTIONS "--host-compilation=C")
IF (CUDA_EMULATION)
SET (CUDA_OPTIONS "${CUDA_OPTIONS} -deviceemu")

2
configure vendored
View File

@ -53,7 +53,7 @@ echo "-- Configuring nvidia-texture-tools "`cat VERSION`
mkdir -p ./build
cd ./build
$CMAKE .. -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1
$CMAKE .. -DNVTT_SHARED=1 -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1
cd ..
echo ""

BIN
gnuwin32/lib/libpng.a Normal file

Binary file not shown.

BIN
gnuwin32/lib/libz.a Normal file

Binary file not shown.

View File

@ -278,6 +278,7 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17"
/>
<Tool

View File

@ -281,6 +281,10 @@
RelativePath="..\..\..\src\nvcore\Debug.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Library.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Memory.cpp"
>
@ -315,6 +319,10 @@
RelativePath="..\..\..\src\nvcore\DefsVcWin32.h"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Library.h"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Memory.h"
>

View File

@ -277,6 +277,7 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17"
/>
<Tool

View File

@ -355,6 +355,10 @@
RelativePath="..\..\..\src\nvimage\nvimage.h"
>
</File>
<File
RelativePath="..\..\..\src\nvimage\PixelFormat.h"
>
</File>
<File
RelativePath="..\..\..\src\nvimage\PsdFile.h"
>

View File

@ -278,11 +278,7 @@
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\src\nvmath\Eigen.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Fitting.cpp"
RelativePath="..\..\..\src\nvmath\Plane.cpp"
>
</File>
</Filter>
@ -299,18 +295,14 @@
RelativePath="..\..\..\src\nvmath\Color.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Eigen.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Fitting.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Matrix.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Plane.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Vector.h"
>

View File

@ -3,18 +3,18 @@ Microsoft Visual Studio Solution File, Format Version 9.00
# Visual Studio 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}"
@ -34,16 +34,16 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvdd
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}"
ProjectSection(ProjectDependencies) = postProject
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}"
@ -55,18 +55,13 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nv
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stress", "stress\stress.vcproj", "{317B694E-B5C1-42A6-956F-FC12B69175A6}"
ProjectSection(ProjectDependencies) = postProject
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug (no cuda)|Any CPU = Debug (no cuda)|Any CPU
@ -319,22 +314,6 @@ Global
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.Build.0 = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|x64.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -53,8 +53,8 @@ END
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 2,1,0,0
PRODUCTVERSION 2,1,0,0
FILEVERSION 2,0,6,0
PRODUCTVERSION 2,0,6,0
FILEFLAGSMASK 0x17L
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -71,12 +71,12 @@ BEGIN
BEGIN
VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 1, 0, 0"
VALUE "FileVersion", "2, 0, 6, 0"
VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 1, 0, 0"
VALUE "ProductVersion", "2, 0, 6, 0"
END
END
BLOCK "VarFileInfo"

View File

@ -96,6 +96,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -258,6 +260,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -420,6 +424,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -578,6 +584,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -683,7 +691,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -693,7 +701,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -703,7 +711,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -713,7 +721,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -841,10 +849,6 @@
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\FastCompressDXT.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\InputOptions.cpp"
>
@ -857,6 +861,10 @@
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OutputOptions.cpp"
>
@ -903,10 +911,6 @@
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\FastCompressDXT.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\InputOptions.h"
>
@ -919,6 +923,10 @@
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OutputOptions.h"
>

View File

@ -1,201 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="stress"
ProjectGUID="{317B694E-B5C1-42A6-956F-FC12B69175A6}"
RootNamespace="stress"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\src\nvtt\tests\stress.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -50,6 +50,7 @@ ENDIF(CG_FOUND)
# CUDA
INCLUDE(${NV_CMAKE_DIR}/FindCUDA.cmake)
IF(CUDA_FOUND)
SET(HAVE_CUDA ${CUDA_FOUND} CACHE BOOL "Set to TRUE if CUDA is found, FALSE otherwise")
MESSAGE(STATUS "Looking for CUDA - found")
ELSE(CUDA_FOUND)
MESSAGE(STATUS "Looking for CUDA - not found")
@ -58,7 +59,7 @@ ENDIF(CUDA_FOUND)
# Maya
INCLUDE(${NV_CMAKE_DIR}/FindMaya.cmake)
IF(MAYA_FOUND)
SET(HAVE_MAYA MAYA_FOUND)
SET(HAVE_MAYA ${MAYA_FOUND} CACHE BOOL "Set to TRUE if Maya is found, FALSE otherwise")
MESSAGE(STATUS "Looking for Maya - found")
ELSE(MAYA_FOUND)
MESSAGE(STATUS "Looking for Maya - not found")
@ -67,7 +68,7 @@ ENDIF(MAYA_FOUND)
# JPEG
INCLUDE(FindJPEG)
IF(JPEG_FOUND)
SET(HAVE_JPEG JPEG_FOUND)
SET(HAVE_JPEG ${JPEG_FOUND} CACHE BOOL "Set to TRUE if JPEG is found, FALSE otherwise")
MESSAGE(STATUS "Looking for JPEG - found")
ELSE(JPEG_FOUND)
MESSAGE(STATUS "Looking for JPEG - not found")
@ -76,7 +77,7 @@ ENDIF(JPEG_FOUND)
# PNG
INCLUDE(FindPNG)
IF(PNG_FOUND)
SET(HAVE_PNG PNG_FOUND)
SET(HAVE_PNG ${PNG_FOUND} CACHE BOOL "Set to TRUE if PNG is found, FALSE otherwise")
MESSAGE(STATUS "Looking for PNG - found")
ELSE(PNG_FOUND)
MESSAGE(STATUS "Looking for PNG - not found")
@ -85,7 +86,7 @@ ENDIF(PNG_FOUND)
# TIFF
INCLUDE(FindTIFF)
IF(TIFF_FOUND)
SET(HAVE_TIFF TIFF_FOUND)
SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise")
MESSAGE(STATUS "Looking for TIFF - found")
ELSE(TIFF_FOUND)
MESSAGE(STATUS "Looking for TIFF - not found")
@ -94,7 +95,7 @@ ENDIF(TIFF_FOUND)
# OpenEXR
INCLUDE(${NV_CMAKE_DIR}/FindOpenEXR.cmake)
IF(OPENEXR_FOUND)
SET(HAVE_OPENEXR OPENEXR_FOUND)
SET(HAVE_OPENEXR ${OPENEXR_FOUND} CACHE BOOL "Set to TRUE if OpenEXR is found, FALSE otherwise")
MESSAGE(STATUS "Looking for OpenEXR - found")
ELSE(OPENEXR_FOUND)
MESSAGE(STATUS "Looking for OpenEXR - not found")

View File

@ -18,16 +18,20 @@ SET(CORE_SRCS
TextReader.cpp
TextWriter.h
TextWriter.cpp
Tokenizer.h
Tokenizer.cpp
Radix.h
Radix.cpp)
Radix.cpp
Library.h
Library.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
# targets
ADD_DEFINITIONS(-DNVCORE_EXPORTS)
IF(UNIX)
SET(LIBS ${LIBS} ${CMAKE_DL_LIBS})
ENDIF(UNIX)
IF(NVCORE_SHARED)
ADD_LIBRARY(nvcore SHARED ${CORE_SRCS})
ELSE(NVCORE_SHARED)

View File

@ -446,7 +446,7 @@ namespace nv
// Call default constructors
for( i = old_size; i < new_size; i++ ) {
new(m_buffer+i) T(); // placement new
new(m_buffer+i) T; // placement new
}
}
@ -824,13 +824,13 @@ namespace nv
}
/// Number of entries in the hash.
int size()
int size() const
{
return entry_count;
}
/// Number of entries in the hash.
int count()
int count() const
{
return size();
}

View File

@ -28,7 +28,7 @@
#endif
#if NV_OS_LINUX && defined(HAVE_EXECINFO_H)
# include <execinfo.h>
# include <execinfo.h> // backtrace
# if NV_CC_GNUC // defined(HAVE_CXXABI_H)
# include <cxxabi.h>
# endif
@ -39,6 +39,13 @@
# include <sys/types.h>
# include <sys/sysctl.h> // sysctl
# include <ucontext.h>
# undef HAVE_EXECINFO_H
# if defined(HAVE_EXECINFO_H) // only after OSX 10.5
# include <execinfo.h> // backtrace
# if NV_CC_GNUC // defined(HAVE_CXXABI_H)
# include <cxxabi.h>
# endif
# endif
#endif
#include <stdexcept> // std::runtime_error
@ -74,7 +81,9 @@ namespace
// TODO write minidump
static LONG WINAPI nvTopLevelFilter( struct _EXCEPTION_POINTERS *pExceptionInfo ) {
static LONG WINAPI nvTopLevelFilter( struct _EXCEPTION_POINTERS * pExceptionInfo)
{
NV_UNUSED(pExceptionInfo);
/* BOOL (WINAPI * Dump) (HANDLE, DWORD, HANDLE, MINIDUMP_TYPE, PMINIDUMP_EXCEPTION_INFORMATION, PMINIDUMP_USER_STREAM_INFORMATION, PMINIDUMP_CALLBACK_INFORMATION );
AutoString dbghelp_path(512);
@ -126,6 +135,14 @@ namespace
#if defined(HAVE_EXECINFO_H) // NV_OS_LINUX
static bool nvHasStackTrace() {
#if NV_OS_DARWIN
return backtrace != NULL;
#else
return true;
#endif
}
static void nvPrintStackTrace(void * trace[], int size, int start=0) {
char ** string_array = backtrace_symbols(trace, size);
@ -164,24 +181,36 @@ namespace
static void * callerAddress(void * secret)
{
# if NV_OS_DARWIN && NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.srr0;
# elif NV_OS_DARWIN && NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.eip;
# elif NV_CPU_X86_64
// #define REG_RIP REG_INDEX(rip) // seems to be 16
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.gregs[REG_RIP];
# elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/];
# elif NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext.regs->nip;
# if NV_OS_DARWIN
# if defined(_STRUCT_MCONTEXT)
# if NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->__ss.__srr0;
# elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->__ss.__eip;
# endif
# else
# if NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.srr0;
# elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.eip;
# endif
# endif
# else
return NULL;
# if NV_CPU_X86_64
// #define REG_RIP REG_INDEX(rip) // seems to be 16
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.gregs[REG_RIP];
# elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/];
# elif NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext.regs->nip;
# endif
# endif
// How to obtain the instruction pointers in different platforms, from mlton's source code.
@ -226,17 +255,18 @@ namespace
}
# if defined(HAVE_EXECINFO_H)
if (nvHasStackTrace()) // in case of weak linking
{
void * trace[64];
int size = backtrace(trace, 64);
void * trace[64];
int size = backtrace(trace, 64);
if (pnt != NULL) {
// Overwrite sigaction with caller's address.
trace[1] = pnt;
}
if (pnt != NULL) {
// Overwrite sigaction with caller's address.
trace[1] = pnt;
nvPrintStackTrace(trace, size, 1);
}
nvPrintStackTrace(trace, size, 1);
# endif // defined(HAVE_EXECINFO_H)
exit(0);
@ -371,9 +401,12 @@ namespace
# endif
# if defined(HAVE_EXECINFO_H)
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 3);
if (nvHasStackTrace())
{
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 2);
}
# endif
// Exit cleanly.
@ -420,9 +453,12 @@ void NV_CDECL nvDebug(const char *msg, ...)
void debug::dumpInfo()
{
#if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H)
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 1);
if (nvHasStackTrace())
{
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 1);
}
#endif
}

View File

@ -2,8 +2,7 @@
#error "Do not include this file directly."
#endif
#include <stdlib.h> // uint8_t, int8_t, ...
#include <stdint.h> // uint8_t, int8_t, ...
// Function linkage
#define DLL_IMPORT

View File

@ -19,7 +19,9 @@
// Set standard function names.
#define snprintf _snprintf
#define vsnprintf _vsnprintf
#if _MSC_VER < 1500
# define vsnprintf _vsnprintf
#endif
#define vsscanf _vsscanf
#define chdir _chdir
#define getcwd _getcwd
@ -70,8 +72,6 @@ typedef uint32 uint;
#pragma warning(disable : 4711) // function selected for automatic inlining
#pragma warning(disable : 4725) // Pentium fdiv bug
#pragma warning(disable : 4345) // behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized
#pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged.
#pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup

41
src/nvcore/Library.cpp Normal file
View File

@ -0,0 +1,41 @@
#include "Library.h"
#include "Debug.h"
#if NV_OS_WIN32
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#include <windows.h>
#else
#include <dlfcn.h>
#endif
void * nvLoadLibrary(const char * name)
{
#if NV_OS_WIN32
return (void *)LoadLibraryExA( name, NULL, 0 );
#else
return dlopen(name, RTLD_LAZY);
#endif
}
void nvUnloadLibrary(void * handle)
{
nvDebugCheck(handle != NULL);
#if NV_OS_WIN32
FreeLibrary((HMODULE)handle);
#else
dlclose(handle);
#endif
}
void * nvBindSymbol(void * handle, const char * symbol)
{
#if NV_OS_WIN32
return (void *)GetProcAddress((HMODULE)handle, symbol);
#else
return (void *)dlsym(handle, symbol);
#endif
}

50
src/nvcore/Library.h Normal file
View File

@ -0,0 +1,50 @@
// This code is in the public domain -- castano@gmail.com
#ifndef NV_CORE_LIBRARY_H
#define NV_CORE_LIBRARY_H
#include <nvcore/nvcore.h>
#if NV_OS_WIN32
#define LIBRARY_NAME(name) #name ".dll"
#elif NV_OS_DARWIN
#define NV_LIBRARY_NAME(name) "lib" #name ".dylib"
#else
#define NV_LIBRARY_NAME(name) "lib" #name ".so"
#endif
NVCORE_API void * nvLoadLibrary(const char * name);
NVCORE_API void nvUnloadLibrary(void * lib);
NVCORE_API void * nvBindSymbol(void * lib, const char * symbol);
class NVCORE_CLASS Library
{
public:
Library(const char * name)
{
handle = nvLoadLibrary(name);
}
~Library()
{
if (isValid())
{
nvUnloadLibrary(handle);
}
}
bool isValid() const
{
return handle != NULL;
}
void * bindSymbol(const char * symbol)
{
return nvBindSymbol(handle, symbol);
}
private:
void * handle;
};
#endif // NV_CORE_LIBRARY_H

View File

@ -18,6 +18,8 @@ void * nv::mem::malloc(size_t size)
void * nv::mem::malloc(size_t size, const char * file, int line)
{
NV_UNUSED(file);
NV_UNUSED(line);
return ::malloc(size);
}

View File

@ -24,7 +24,7 @@ __forceinline void nvPrefetch(const void * mem)
#else // NV_CC_MSVC
// do nothing in other case.
#define piPrefetch(ptr)
#define nvPrefetch(ptr)
#endif // NV_CC_MSVC

View File

@ -43,8 +43,11 @@ public:
/** Delete owned pointer and assign new one. */
void operator=( T * p ) {
delete m_ptr;
m_ptr = p;
if (p != m_ptr)
{
delete m_ptr;
m_ptr = p;
}
}
/** Member access. */
@ -249,14 +252,14 @@ public:
/** -> operator. */
BaseClass * operator -> () const
{
piCheck( m_ptr != NULL );
nvCheck( m_ptr != NULL );
return m_ptr;
}
/** * operator. */
BaseClass & operator*() const
{
piCheck( m_ptr != NULL );
nvCheck( m_ptr != NULL );
return *m_ptr;
}

View File

@ -47,25 +47,25 @@ public:
/** @name Stream implementation. */
//@{
virtual void seek( int pos )
virtual void seek( uint pos )
{
nvDebugCheck(m_fp != NULL);
nvDebugCheck(pos >= 0 && pos < size());
nvDebugCheck(pos < size());
fseek(m_fp, pos, SEEK_SET);
}
virtual int tell() const
virtual uint tell() const
{
nvDebugCheck(m_fp != NULL);
return ftell(m_fp);
}
virtual int size() const
virtual uint size() const
{
nvDebugCheck(m_fp != NULL);
int pos = ftell(m_fp);
uint pos = ftell(m_fp);
fseek(m_fp, 0, SEEK_END);
int end = ftell(m_fp);
uint end = ftell(m_fp);
fseek(m_fp, pos, SEEK_SET);
return end;
}
@ -117,11 +117,11 @@ public:
/** @name Stream implementation. */
//@{
/// Write data.
virtual void serialize( void * data, int len )
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(m_fp != NULL);
fwrite(data, len, 1, m_fp);
return (uint)fwrite(data, 1, len, m_fp);
}
virtual bool isLoading() const
@ -156,11 +156,11 @@ public:
/** @name Stream implementation. */
//@{
/// Read data.
virtual void serialize( void * data, int len )
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(m_fp != NULL);
fread(data, len, 1, m_fp);
return (uint)fread(data, 1, len, m_fp);
}
virtual bool isLoading() const
@ -184,33 +184,40 @@ class NVCORE_CLASS MemoryInputStream : public Stream
public:
/// Ctor.
MemoryInputStream( const uint8 * mem, int size ) :
MemoryInputStream( const uint8 * mem, uint size ) :
m_mem(mem), m_ptr(mem), m_size(size) { }
/** @name Stream implementation. */
//@{
/// Read data.
virtual void serialize( void * data, int len )
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(!isError());
uint left = m_size - tell();
if (len > left) len = left;
memcpy( data, m_ptr, len );
m_ptr += len;
return len;
}
virtual void seek( int pos )
virtual void seek( uint pos )
{
nvDebugCheck(!isError());
m_ptr = m_mem + pos;
nvDebugCheck(!isError());
}
virtual int tell() const
virtual uint tell() const
{
return int(m_ptr - m_mem);
nvDebugCheck(m_ptr >= m_mem);
return uint(m_ptr - m_mem);
}
virtual int size() const
virtual uint size() const
{
return m_size;
}
@ -252,7 +259,7 @@ private:
const uint8 * m_mem;
const uint8 * m_ptr;
int m_size;
uint m_size;
};
@ -286,17 +293,19 @@ public:
/** @name Stream implementation. */
//@{
/// Read data.
virtual void serialize( void * data, int len )
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
m_s->serialize( data, len );
len = m_s->serialize( data, len );
if( m_s->isError() ) {
throw std::exception();
}
return len;
}
virtual void seek( int pos )
virtual void seek( uint pos )
{
m_s->seek( pos );
@ -305,12 +314,12 @@ public:
}
}
virtual int tell() const
virtual uint tell() const
{
return m_s->tell();
}
virtual int size() const
virtual uint size() const
{
return m_s->size();
}

View File

@ -209,48 +209,11 @@ StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL)
}
/** Copy string. */
StringBuilder::StringBuilder( const char * s )
StringBuilder::StringBuilder( const char * s ) : m_size(0), m_str(NULL)
{
copy(s);
}
/** Allocate and copy string. */
StringBuilder::StringBuilder( int size_hint, const StringBuilder & s) : m_size(size_hint), m_str(NULL)
{
nvDebugCheck(m_size > 0);
m_str = strAlloc(m_size);
copy(s);
}
/** Allocate and format string. */
StringBuilder::StringBuilder( const char * fmt, ... ) : m_size(0), m_str(NULL)
{
nvDebugCheck(fmt != NULL);
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
/** Allocate and format string. */
StringBuilder::StringBuilder( int size_hint, const char * fmt, ... ) : m_size(size_hint), m_str(NULL)
{
nvDebugCheck(m_size > 0);
nvDebugCheck(fmt != NULL);
m_str = strAlloc(m_size);
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
/** Delete the string. */
StringBuilder::~StringBuilder()
{
@ -278,8 +241,7 @@ StringBuilder & StringBuilder::format( const char * fmt, ... )
/** Format a string safely. */
StringBuilder & StringBuilder::format( const char * fmt, va_list arg )
{
nvCheck(fmt != NULL);
nvCheck(m_size >= 0);
nvDebugCheck(fmt != NULL);
if( m_size == 0 ) {
m_size = 64;
@ -327,8 +289,7 @@ StringBuilder & StringBuilder::format( const char * fmt, va_list arg )
/** Append a string. */
StringBuilder & StringBuilder::append( const char * s )
{
nvCheck(s != NULL);
nvCheck(m_size >= 0);
nvDebugCheck(s != NULL);
const uint slen = uint(strlen( s ));
@ -475,31 +436,6 @@ void StringBuilder::reset()
}
Path::Path(const char * fmt, ...)
{
nvDebugCheck( fmt != NULL );
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
Path::Path(int size_hint, const char * fmt, ...) : StringBuilder(size_hint)
{
nvDebugCheck( fmt != NULL );
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
/// Get the file name from a path.
const char * Path::fileName() const
{

View File

@ -14,7 +14,7 @@ namespace nv
uint strHash(const char * str, uint h) NV_PURE;
/// String hash vased on Bernstein's hash.
/// String hash based on Bernstein's hash.
inline uint strHash(const char * data, uint h = 5381)
{
uint i;
@ -47,9 +47,6 @@ namespace nv
explicit StringBuilder( int size_hint );
StringBuilder( const char * str );
StringBuilder( const StringBuilder & );
StringBuilder( int size_hint, const StringBuilder & );
StringBuilder( const char * format, ... ) __attribute__((format (printf, 2, 3)));
StringBuilder( int size_hint, const char * format, ... ) __attribute__((format (printf, 3, 4)));
~StringBuilder();
@ -122,16 +119,14 @@ namespace nv
};
/// Path string.
/// Path string. @@ This should be called PathBuilder.
class NVCORE_CLASS Path : public StringBuilder
{
public:
Path() : StringBuilder() {}
explicit Path(int size_hint) : StringBuilder(size_hint) {}
Path(const StringBuilder & str) : StringBuilder(str) {}
Path(int size_hint, const StringBuilder & str) : StringBuilder(size_hint, str) {}
Path(const char * format, ...) __attribute__((format (printf, 2, 3)));
Path(int size_hint, const char * format, ...) __attribute__((format (printf, 3, 4)));
Path(const char * str) : StringBuilder(str) {}
Path(const Path & path) : StringBuilder(path) {}
const char * fileName() const;
const char * extension() const;
@ -213,9 +208,12 @@ namespace nv
/// Implement value semantics.
String & operator=( const String & str )
{
release();
data = str.data;
addRef();
if (str.data != data)
{
release();
data = str.data;
addRef();
}
return *this;
}

View File

@ -41,17 +41,17 @@ public:
ByteOrder byteOrder() const { return m_byteOrder; }
/// Serialize the given data. @@ Should return bytes serialized?
virtual void serialize( void * data, int len ) = 0;
/// Serialize the given data.
virtual uint serialize( void * data, uint len ) = 0;
/// Move to the given position in the archive.
virtual void seek( int pos ) = 0;
virtual void seek( uint pos ) = 0;
/// Return the current position in the archive.
virtual int tell() const = 0;
virtual uint tell() const = 0;
/// Return the current size of the archive.
virtual int size() const = 0;
virtual uint size() const = 0;
/// Determine if there has been any error.
virtual bool isError() const = 0;
@ -136,13 +136,13 @@ public:
protected:
/// Serialize in the stream byte order.
Stream & byteOrderSerialize( void * v, int len ) {
Stream & byteOrderSerialize( void * v, uint len ) {
if( m_byteOrder == getSystemByteOrder() ) {
serialize( v, len );
}
else {
for( int i=len-1; i>=0; i-- ) {
serialize( (uint8 *)v + i, 1 );
for( uint i = len; i > 0; i-- ) {
serialize( (uint8 *)v + i - 1, 1 );
}
}
return *this;

View File

@ -307,15 +307,6 @@ void ColorBlock::boundsRangeAlpha(Color32 * start, Color32 * end) const
}
void ColorBlock::bestFitRange(Color32 * start, Color32 * end) const
{
nvDebugCheck(start != NULL);
nvDebugCheck(end != NULL);
Vector3 axis = bestFitLine().direction();
computeRange(axis, start, end);
}
/// Sort colors by abosolute value in their 16 bit representation.
void ColorBlock::sortColorsByAbsoluteValue()
{
@ -393,19 +384,6 @@ void ColorBlock::sortColors(const Vector3 & axis)
}
/// Get least squares line that best approxiamtes the points of the color block.
Line3 ColorBlock::bestFitLine() const
{
Array<Vector3> pointArray(16);
for(int i = 0; i < 16; i++) {
pointArray.append(Vector3(m_color[i].r, m_color[i].g, m_color[i].b));
}
return Fit::bestLine(pointArray);
}
/// Get the volume of the color block.
float ColorBlock::volume() const
{

View File

@ -4,7 +4,6 @@
#define NV_IMAGE_COLORBLOCK_H
#include <nvmath/Color.h>
#include <nvmath/Fitting.h> // Line3
namespace nv
{
@ -33,16 +32,13 @@ namespace nv
void luminanceRange(Color32 * start, Color32 * end) const;
void boundsRange(Color32 * start, Color32 * end) const;
void boundsRangeAlpha(Color32 * start, Color32 * end) const;
void bestFitRange(Color32 * start, Color32 * end) const;
void sortColorsByAbsoluteValue();
void computeRange(const Vector3 & axis, Color32 * start, Color32 * end) const;
void sortColors(const Vector3 & axis);
Line3 bestFitLine() const;
float volume() const;
Line3 diameterLine() const;
// Accessors
const Color32 * colors() const;

View File

@ -54,6 +54,10 @@ namespace
static const uint FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1');
static const uint FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2');
static const uint FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y');
static const uint FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0');
// 32 bit RGB formats.
static const uint D3DFMT_R8G8B8 = 20;
static const uint D3DFMT_A8R8G8B8 = 21;
@ -253,6 +257,144 @@ namespace
D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4,
};
const char * getDxgiFormatString(DXGI_FORMAT dxgiFormat)
{
#define CASE(format) case DXGI_FORMAT_##format: return #format
switch(dxgiFormat)
{
CASE(UNKNOWN);
CASE(R32G32B32A32_TYPELESS);
CASE(R32G32B32A32_FLOAT);
CASE(R32G32B32A32_UINT);
CASE(R32G32B32A32_SINT);
CASE(R32G32B32_TYPELESS);
CASE(R32G32B32_FLOAT);
CASE(R32G32B32_UINT);
CASE(R32G32B32_SINT);
CASE(R16G16B16A16_TYPELESS);
CASE(R16G16B16A16_FLOAT);
CASE(R16G16B16A16_UNORM);
CASE(R16G16B16A16_UINT);
CASE(R16G16B16A16_SNORM);
CASE(R16G16B16A16_SINT);
CASE(R32G32_TYPELESS);
CASE(R32G32_FLOAT);
CASE(R32G32_UINT);
CASE(R32G32_SINT);
CASE(R32G8X24_TYPELESS);
CASE(D32_FLOAT_S8X24_UINT);
CASE(R32_FLOAT_X8X24_TYPELESS);
CASE(X32_TYPELESS_G8X24_UINT);
CASE(R10G10B10A2_TYPELESS);
CASE(R10G10B10A2_UNORM);
CASE(R10G10B10A2_UINT);
CASE(R11G11B10_FLOAT);
CASE(R8G8B8A8_TYPELESS);
CASE(R8G8B8A8_UNORM);
CASE(R8G8B8A8_UNORM_SRGB);
CASE(R8G8B8A8_UINT);
CASE(R8G8B8A8_SNORM);
CASE(R8G8B8A8_SINT);
CASE(R16G16_TYPELESS);
CASE(R16G16_FLOAT);
CASE(R16G16_UNORM);
CASE(R16G16_UINT);
CASE(R16G16_SNORM);
CASE(R16G16_SINT);
CASE(R32_TYPELESS);
CASE(D32_FLOAT);
CASE(R32_FLOAT);
CASE(R32_UINT);
CASE(R32_SINT);
CASE(R24G8_TYPELESS);
CASE(D24_UNORM_S8_UINT);
CASE(R24_UNORM_X8_TYPELESS);
CASE(X24_TYPELESS_G8_UINT);
CASE(R8G8_TYPELESS);
CASE(R8G8_UNORM);
CASE(R8G8_UINT);
CASE(R8G8_SNORM);
CASE(R8G8_SINT);
CASE(R16_TYPELESS);
CASE(R16_FLOAT);
CASE(D16_UNORM);
CASE(R16_UNORM);
CASE(R16_UINT);
CASE(R16_SNORM);
CASE(R16_SINT);
CASE(R8_TYPELESS);
CASE(R8_UNORM);
CASE(R8_UINT);
CASE(R8_SNORM);
CASE(R8_SINT);
CASE(A8_UNORM);
CASE(R1_UNORM);
CASE(R9G9B9E5_SHAREDEXP);
CASE(R8G8_B8G8_UNORM);
CASE(G8R8_G8B8_UNORM);
CASE(BC1_TYPELESS);
CASE(BC1_UNORM);
CASE(BC1_UNORM_SRGB);
CASE(BC2_TYPELESS);
CASE(BC2_UNORM);
CASE(BC2_UNORM_SRGB);
CASE(BC3_TYPELESS);
CASE(BC3_UNORM);
CASE(BC3_UNORM_SRGB);
CASE(BC4_TYPELESS);
CASE(BC4_UNORM);
CASE(BC4_SNORM);
CASE(BC5_TYPELESS);
CASE(BC5_UNORM);
CASE(BC5_SNORM);
CASE(B5G6R5_UNORM);
CASE(B5G5R5A1_UNORM);
CASE(B8G8R8A8_UNORM);
CASE(B8G8R8X8_UNORM);
default:
return "UNKNOWN";
}
#undef CASE
}
const char * getD3d10ResourceDimensionString(D3D10_RESOURCE_DIMENSION resourceDimension)
{
switch(resourceDimension)
{
default:
case D3D10_RESOURCE_DIMENSION_UNKNOWN: return "UNKNOWN";
case D3D10_RESOURCE_DIMENSION_BUFFER: return "BUFFER";
case D3D10_RESOURCE_DIMENSION_TEXTURE1D: return "TEXTURE1D";
case D3D10_RESOURCE_DIMENSION_TEXTURE2D: return "TEXTURE2D";
case D3D10_RESOURCE_DIMENSION_TEXTURE3D: return "TEXTURE3D";
}
}
} // namespace
namespace nv
@ -390,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (0 << 16) | (9 << 8) | (5); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (6); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;
@ -494,7 +636,16 @@ void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
// set fourcc pixel format.
this->pf.flags = DDPF_FOURCC;
this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3);
this->pf.bitcount = 0;
if (this->pf.fourcc == FOURCC_ATI2)
{
this->pf.bitcount = FOURCC_A2XY;
}
else
{
this->pf.bitcount = 0;
}
this->pf.rmask = 0;
this->pf.gmask = 0;
this->pf.bmask = 0;
@ -530,9 +681,9 @@ void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask
nvCheck(bitcount > 0 && bitcount <= 32);
// Align to 8.
if (bitcount < 8) bitcount = 8;
else if (bitcount < 16) bitcount = 16;
else if (bitcount < 24) bitcount = 24;
if (bitcount <= 8) bitcount = 8;
else if (bitcount <= 16) bitcount = 16;
else if (bitcount <= 24) bitcount = 24;
else bitcount = 32;
this->pf.fourcc = 0; //findD3D9Format(bitcount, rmask, gmask, bmask, amask);
@ -545,7 +696,8 @@ void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask
void DDSHeader::setDX10Format(uint format)
{
this->pf.flags = 0;
//this->pf.flags = 0;
this->pf.fourcc = FOURCC_DX10;
this->header10.dxgiFormat = format;
}
@ -593,7 +745,8 @@ void DDSHeader::swapBytes()
bool DDSHeader::hasDX10Header() const
{
return this->pf.flags == 0;
return this->pf.fourcc == FOURCC_DX10; // @@ This is according to AMD
//return this->pf.flags == 0; // @@ This is according to MS
}
@ -623,7 +776,7 @@ bool DirectDrawSurface::isValid() const
return false;
}
const uint required = (DDSD_WIDTH|DDSD_HEIGHT|DDSD_CAPS|DDSD_PIXELFORMAT);
const uint required = (DDSD_WIDTH|DDSD_HEIGHT/*|DDSD_CAPS|DDSD_PIXELFORMAT*/);
if( (header.flags & required) != required ) {
return false;
}
@ -643,40 +796,46 @@ bool DirectDrawSurface::isSupported() const
{
nvDebugCheck(isValid());
if (header.pf.flags & DDPF_FOURCC)
if (header.hasDX10Header())
{
if (header.pf.fourcc != FOURCC_DXT1 &&
header.pf.fourcc != FOURCC_DXT2 &&
header.pf.fourcc != FOURCC_DXT3 &&
header.pf.fourcc != FOURCC_DXT4 &&
header.pf.fourcc != FOURCC_DXT5 &&
header.pf.fourcc != FOURCC_RXGB &&
header.pf.fourcc != FOURCC_ATI1 &&
header.pf.fourcc != FOURCC_ATI2)
{
// Unknown fourcc code.
return false;
}
}
else if (header.pf.flags & DDPF_RGB)
{
// All RGB formats are supported now.
}
else
{
return false;
}
if (header.pf.flags & DDPF_FOURCC)
{
if (header.pf.fourcc != FOURCC_DXT1 &&
header.pf.fourcc != FOURCC_DXT2 &&
header.pf.fourcc != FOURCC_DXT3 &&
header.pf.fourcc != FOURCC_DXT4 &&
header.pf.fourcc != FOURCC_DXT5 &&
header.pf.fourcc != FOURCC_RXGB &&
header.pf.fourcc != FOURCC_ATI1 &&
header.pf.fourcc != FOURCC_ATI2)
{
// Unknown fourcc code.
return false;
}
}
else if (header.pf.flags & DDPF_RGB)
{
// All RGB formats are supported now.
}
else
{
return false;
}
if (isTextureCube() && (header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES)
{
// Cubemaps must contain all faces.
return false;
}
if (isTextureCube() && (header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES)
{
// Cubemaps must contain all faces.
return false;
}
if (isTexture3D())
{
// @@ 3D textures not supported yet.
return false;
if (isTexture3D())
{
// @@ 3D textures not supported yet.
return false;
}
}
return true;
@ -712,16 +871,40 @@ uint DirectDrawSurface::depth() const
else return 1;
}
bool DirectDrawSurface::isTexture1D() const
{
nvDebugCheck(isValid());
if (header.hasDX10Header())
{
return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE1D;
}
return false;
}
bool DirectDrawSurface::isTexture2D() const
{
nvDebugCheck(isValid());
return !isTexture3D() && !isTextureCube();
if (header.hasDX10Header())
{
return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE2D;
}
else
{
return !isTexture3D() && !isTextureCube();
}
}
bool DirectDrawSurface::isTexture3D() const
{
nvDebugCheck(isValid());
return (header.caps.caps2 & DDSCAPS2_VOLUME) != 0;
if (header.hasDX10Header())
{
return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE3D;
}
else
{
return (header.caps.caps2 & DDSCAPS2_VOLUME) != 0;
}
}
bool DirectDrawSurface::isTextureCube() const
@ -730,6 +913,12 @@ bool DirectDrawSurface::isTextureCube() const
return (header.caps.caps2 & DDSCAPS2_CUBEMAP) != 0;
}
void DirectDrawSurface::setNormalFlag(bool b)
{
nvDebugCheck(isValid());
header.setNormalFlag(b);
}
void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap)
{
nvDebugCheck(isValid());
@ -780,7 +969,13 @@ void DirectDrawSurface::readLinearImage(Image * img)
uint byteCount = (header.pf.bitcount + 7) / 8;
if (header.pf.amask != 0)
// set image format: RGB or ARGB
// alpha channel exists if and only if the alpha mask is non-zero
if (header.pf.amask == 0)
{
img->setFormat(Image::Format_RGB);
}
else
{
img->setFormat(Image::Format_ARGB);
}
@ -794,10 +989,10 @@ void DirectDrawSurface::readLinearImage(Image * img)
stream->serialize(&c, byteCount);
Color32 pixel(0, 0, 0, 0xFF);
pixel.r = PixelFormat::convert(c >> rshift, rsize, 8);
pixel.g = PixelFormat::convert(c >> gshift, gsize, 8);
pixel.b = PixelFormat::convert(c >> bshift, bsize, 8);
pixel.a = PixelFormat::convert(c >> ashift, asize, 8);
pixel.r = PixelFormat::convert((c & header.pf.rmask) >> rshift, rsize, 8);
pixel.g = PixelFormat::convert((c & header.pf.gmask) >> gshift, gsize, 8);
pixel.b = PixelFormat::convert((c & header.pf.bmask) >> bshift, bsize, 8);
pixel.a = PixelFormat::convert((c & header.pf.amask) >> ashift, asize, 8);
img->pixel(x, y) = pixel;
}
@ -809,6 +1004,19 @@ void DirectDrawSurface::readBlockImage(Image * img)
nvDebugCheck(stream != NULL);
nvDebugCheck(img != NULL);
// set image format: RGB or ARGB
if (header.pf.fourcc == FOURCC_RXGB ||
header.pf.fourcc == FOURCC_ATI1 ||
header.pf.fourcc == FOURCC_ATI2 ||
header.pf.flags & DDPF_NORMAL)
{
img->setFormat(Image::Format_RGB);
}
else
{
img->setFormat(Image::Format_ARGB);
}
const uint w = img->width();
const uint h = img->height();
@ -1044,8 +1252,23 @@ void DirectDrawSurface::printInfo() const
if (header.pf.flags & DDPF_ALPHAPREMULT) printf("\t\tDDPF_ALPHAPREMULT\n");
if (header.pf.flags & DDPF_NORMAL) printf("\t\tDDPF_NORMAL\n");
printf("\tFourCC: '%c%c%c%c'\n", ((header.pf.fourcc >> 0) & 0xFF), ((header.pf.fourcc >> 8) & 0xFF), ((header.pf.fourcc >> 16) & 0xFF), ((header.pf.fourcc >> 24) & 0xFF));
printf("\tBit count: %d\n", header.pf.bitcount);
printf("\tFourCC: '%c%c%c%c'\n",
((header.pf.fourcc >> 0) & 0xFF),
((header.pf.fourcc >> 8) & 0xFF),
((header.pf.fourcc >> 16) & 0xFF),
((header.pf.fourcc >> 24) & 0xFF));
if ((header.pf.fourcc & DDPF_FOURCC) && (header.pf.bitcount != 0))
{
printf("\tSwizzle: '%c%c%c%c'\n",
(header.pf.bitcount >> 0) & 0xFF,
(header.pf.bitcount >> 8) & 0xFF,
(header.pf.bitcount >> 16) & 0xFF,
(header.pf.bitcount >> 24) & 0xFF);
}
else
{
printf("\tBit count: %d\n", header.pf.bitcount);
}
printf("\tRed mask: 0x%.8X\n", header.pf.rmask);
printf("\tGreen mask: 0x%.8X\n", header.pf.gmask);
printf("\tBlue mask: 0x%.8X\n", header.pf.bmask);
@ -1076,11 +1299,11 @@ void DirectDrawSurface::printInfo() const
printf("\tCaps 3: 0x%.8X\n", header.caps.caps3);
printf("\tCaps 4: 0x%.8X\n", header.caps.caps4);
if (header.pf.flags == 0)
if (header.hasDX10Header())
{
printf("DX10 Header:\n");
printf("\tDXGI Format: %u\n", header.header10.dxgiFormat);
printf("\tResource dimension: %u\n", header.header10.resourceDimension);
printf("\tDXGI Format: %u (%s)\n", header.header10.dxgiFormat, getDxgiFormatString((DXGI_FORMAT)header.header10.dxgiFormat));
printf("\tResource dimension: %u (%s)\n", header.header10.resourceDimension, getD3d10ResourceDimensionString((D3D10_RESOURCE_DIMENSION)header.header10.resourceDimension));
printf("\tMisc flag: %u\n", header.header10.miscFlag);
printf("\tArray size: %u\n", header.header10.arraySize);
}

View File

@ -119,10 +119,13 @@ namespace nv
uint width() const;
uint height() const;
uint depth() const;
bool isTexture1D() const;
bool isTexture2D() const;
bool isTexture3D() const;
bool isTextureCube() const;
void setNormalFlag(bool b);
void mipmap(Image * img, uint f, uint m);
// void mipmap(FloatImage * img, uint f, uint m);

View File

@ -33,11 +33,10 @@
* http://www.dspguide.com/ch16.htm
*/
#include "Filter.h"
#include <nvcore/Containers.h> // swap
#include <nvmath/nvmath.h> // fabs
#include <nvmath/Vector.h> // Vector4
#include <nvimage/Filter.h>
#include <nvcore/Containers.h> // swap
using namespace nv;
@ -244,7 +243,7 @@ SincFilter::SincFilter(float w) : Filter(w) {}
float SincFilter::evaluate(float x) const
{
return 0.0f;
return sincf(PI * x);
}
@ -504,7 +503,7 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
for (int i = 0; i < 7; i++) {
for (int e = 0; e < 7; e++) {
m_data[i * 9 + e + 1] += elements[i * 7 + e] * scale.z();
m_data[(i + 1) * 9 + e + 1] += elements[i * 7 + e] * scale.z();
}
}
}
@ -519,7 +518,7 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
for (int i = 0; i < 5; i++) {
for (int e = 0; e < 5; e++) {
m_data[i * 9 + e + 2] += elements[i * 5 + e] * scale.y();
m_data[(i + 2) * 9 + e + 2] += elements[i * 5 + e] * scale.y();
}
}
}
@ -532,7 +531,7 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
for (int i = 0; i < 3; i++) {
for (int e = 0; e < 3; e++) {
m_data[i * 9 + e + 3] += elements[i * 3 + e] * scale.x();
m_data[(i + 3) * 9 + e + 3] += elements[i * 3 + e] * scale.x();
}
}
}
@ -541,12 +540,17 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples/*= 32*/)
{
nvCheck(srcLength >= dstLength); // @@ Upsampling not implemented!
nvDebugCheck(samples > 0);
const float scale = float(dstLength) / float(srcLength);
float scale = float(dstLength) / float(srcLength);
const float iscale = 1.0f / scale;
if (scale > 1) {
// Upsampling.
samples = 1;
scale = 1;
}
m_length = dstLength;
m_width = f.width() * iscale;
m_windowSize = (int)ceilf(m_width * 2) + 1;

View File

@ -11,16 +11,16 @@ namespace nv
class Vector4;
/// Base filter class.
class Filter
class NVIMAGE_CLASS Filter
{
public:
NVIMAGE_API Filter(float width);
NVIMAGE_API virtual ~Filter();
Filter(float width);
virtual ~Filter();
NVIMAGE_API float width() const { return m_width; }
NVIMAGE_API float sampleDelta(float x, float scale) const;
NVIMAGE_API float sampleBox(float x, float scale, int samples) const;
NVIMAGE_API float sampleTriangle(float x, float scale, int samples) const;
float width() const { return m_width; }
float sampleDelta(float x, float scale) const;
float sampleBox(float x, float scale, int samples) const;
float sampleTriangle(float x, float scale, int samples) const;
virtual float evaluate(float x) const = 0;
@ -29,56 +29,56 @@ namespace nv
};
// Box filter.
class BoxFilter : public Filter
class NVIMAGE_CLASS BoxFilter : public Filter
{
public:
NVIMAGE_API BoxFilter();
NVIMAGE_API BoxFilter(float width);
NVIMAGE_API virtual float evaluate(float x) const;
BoxFilter();
BoxFilter(float width);
virtual float evaluate(float x) const;
};
// Triangle (bilinear/tent) filter.
class TriangleFilter : public Filter
class NVIMAGE_CLASS TriangleFilter : public Filter
{
public:
NVIMAGE_API TriangleFilter();
NVIMAGE_API TriangleFilter(float width);
NVIMAGE_API virtual float evaluate(float x) const;
TriangleFilter();
TriangleFilter(float width);
virtual float evaluate(float x) const;
};
// Quadratic (bell) filter.
class QuadraticFilter : public Filter
class NVIMAGE_CLASS QuadraticFilter : public Filter
{
public:
NVIMAGE_API QuadraticFilter();
NVIMAGE_API virtual float evaluate(float x) const;
QuadraticFilter();
virtual float evaluate(float x) const;
};
// Cubic filter from Thatcher Ulrich.
class CubicFilter : public Filter
class NVIMAGE_CLASS CubicFilter : public Filter
{
public:
NVIMAGE_API CubicFilter();
NVIMAGE_API virtual float evaluate(float x) const;
CubicFilter();
virtual float evaluate(float x) const;
};
// Cubic b-spline filter from Paul Heckbert.
class BSplineFilter : public Filter
class NVIMAGE_CLASS BSplineFilter : public Filter
{
public:
NVIMAGE_API BSplineFilter();
NVIMAGE_API virtual float evaluate(float x) const;
BSplineFilter();
virtual float evaluate(float x) const;
};
/// Mitchell & Netravali's two-param cubic
/// @see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88
class MitchellFilter : public Filter
class NVIMAGE_CLASS MitchellFilter : public Filter
{
public:
NVIMAGE_API MitchellFilter();
NVIMAGE_API virtual float evaluate(float x) const;
MitchellFilter();
virtual float evaluate(float x) const;
NVIMAGE_API void setParameters(float a, float b);
void setParameters(float b, float c);
private:
float p0, p2, p3;
@ -86,29 +86,29 @@ namespace nv
};
// Lanczos3 filter.
class LanczosFilter : public Filter
class NVIMAGE_CLASS LanczosFilter : public Filter
{
public:
NVIMAGE_API LanczosFilter();
NVIMAGE_API virtual float evaluate(float x) const;
LanczosFilter();
virtual float evaluate(float x) const;
};
// Sinc filter.
class SincFilter : public Filter
class NVIMAGE_CLASS SincFilter : public Filter
{
public:
NVIMAGE_API SincFilter(float w);
NVIMAGE_API virtual float evaluate(float x) const;
SincFilter(float w);
virtual float evaluate(float x) const;
};
// Kaiser filter.
class KaiserFilter : public Filter
class NVIMAGE_CLASS KaiserFilter : public Filter
{
public:
NVIMAGE_API KaiserFilter(float w);
NVIMAGE_API virtual float evaluate(float x) const;
KaiserFilter(float w);
virtual float evaluate(float x) const;
NVIMAGE_API void setParameters(float a, float stretch);
void setParameters(float a, float stretch);
private:
float alpha;
@ -118,12 +118,12 @@ namespace nv
/// A 1D kernel. Used to precompute filter weights.
class Kernel1
class NVIMAGE_CLASS Kernel1
{
NV_FORBID_COPY(Kernel1);
public:
NVIMAGE_API Kernel1(const Filter & f, int iscale, int samples = 32);
NVIMAGE_API ~Kernel1();
Kernel1(const Filter & f, int iscale, int samples = 32);
~Kernel1();
float valueAt(uint x) const {
nvDebugCheck(x < (uint)m_windowSize);
@ -138,7 +138,7 @@ namespace nv
return m_width;
}
NVIMAGE_API void debugPrint();
void debugPrint();
private:
int m_windowSize;
@ -148,15 +148,15 @@ namespace nv
/// A 2D kernel.
class Kernel2
class NVIMAGE_CLASS Kernel2
{
public:
NVIMAGE_API Kernel2(uint width);
NVIMAGE_API Kernel2(const Kernel2 & k);
NVIMAGE_API ~Kernel2();
Kernel2(uint width);
Kernel2(const Kernel2 & k);
~Kernel2();
NVIMAGE_API void normalize();
NVIMAGE_API void transpose();
void normalize();
void transpose();
float valueAt(uint x, uint y) const {
return m_data[y * m_windowSize + x];
@ -166,12 +166,12 @@ namespace nv
return m_windowSize;
}
NVIMAGE_API void initLaplacian();
NVIMAGE_API void initEdgeDetection();
NVIMAGE_API void initSobel();
NVIMAGE_API void initPrewitt();
void initLaplacian();
void initEdgeDetection();
void initSobel();
void initPrewitt();
NVIMAGE_API void initBlendedSobel(const Vector4 & scale);
void initBlendedSobel(const Vector4 & scale);
private:
const uint m_windowSize;
@ -180,12 +180,12 @@ namespace nv
/// A 1D polyphase kernel
class PolyphaseKernel
class NVIMAGE_CLASS PolyphaseKernel
{
NV_FORBID_COPY(PolyphaseKernel);
public:
NVIMAGE_API PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32);
NVIMAGE_API ~PolyphaseKernel();
PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32);
~PolyphaseKernel();
int windowSize() const {
return m_windowSize;
@ -205,7 +205,7 @@ namespace nv
return m_data[column * m_windowSize + x];
}
NVIMAGE_API void debugPrint() const;
void debugPrint() const;
private:
int m_windowSize;

View File

@ -1,16 +1,18 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Containers.h>
#include <nvcore/Ptr.h>
#include <nvmath/Color.h>
#include "FloatImage.h"
#include "Filter.h"
#include "Image.h"
#include <nvmath/Color.h>
#include <nvmath/Matrix.h>
#include <nvcore/Containers.h>
#include <nvcore/Ptr.h>
#include <math.h>
using namespace nv;
namespace
@ -140,7 +142,8 @@ Image * FloatImage::createImageGammaCorrect(float gamma/*= 2.2f*/) const
/// Allocate a 2d float image of the given format and the given extents.
void FloatImage::allocate(uint c, uint w, uint h)
{
nvCheck(m_mem == NULL);
free();
m_width = w;
m_height = h;
m_componentNum = c;
@ -151,7 +154,6 @@ void FloatImage::allocate(uint c, uint w, uint h)
/// Free the image, but don't clear the members.
void FloatImage::free()
{
nvCheck(m_mem != NULL);
nv::mem::free( reinterpret_cast<void *>(m_mem) );
m_mem = NULL;
}
@ -376,7 +378,7 @@ FloatImage * FloatImage::fastDownSample() const
{
const uint n = w * h;
if (n & 1)
if ((m_width * m_height) & 1)
{
const float scale = 1.0f / (2 * n + 1);
@ -540,73 +542,27 @@ FloatImage * FloatImage::fastDownSample() const
return dst_image.release();
}
/*
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Kernel1 & kernel, WrapMode wm) const
{
const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2);
return downSample(kernel, w, h, wm);
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Kernel1 & kernel, uint w, uint h, WrapMode wm) const
{
nvCheck(!(kernel.windowSize() & 1)); // Make sure that kernel m_width is even.
AutoPtr<FloatImage> tmp_image( new FloatImage() );
tmp_image->allocate(m_componentNum, w, m_height);
AutoPtr<FloatImage> dst_image( new FloatImage() );
dst_image->allocate(m_componentNum, w, h);
const float xscale = float(m_width) / float(w);
const float yscale = float(m_height) / float(h);
for(uint c = 0; c < m_componentNum; c++) {
float * tmp_channel = tmp_image->channel(c);
for(uint y = 0; y < m_height; y++) {
for(uint x = 0; x < w; x++) {
float sum = this->applyKernelHorizontal(&kernel, uint(x*xscale), y, c, wm);
const uint tmp_index = tmp_image->index(x, y);
tmp_channel[tmp_index] = sum;
}
}
float * dst_channel = dst_image->channel(c);
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
float sum = tmp_image->applyKernelVertical(&kernel, uint(x*xscale), uint(y*yscale), c, wm);
const uint dst_index = dst_image->index(x, y);
dst_channel[dst_index] = sum;
}
}
}
return dst_image.release();
}
*/
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm) const
{
const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2);
return downSample(filter, w, h, wm);
return resize(filter, w, h, wm);
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm, uint alpha) const
{
const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2);
return resize(filter, w, h, wm, alpha);
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapMode wm) const
FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm) const
{
// @@ Use monophase filters when frac(m_width / w) == 0
@ -675,10 +631,56 @@ FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapM
return dst_image.release();
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const
{
nvCheck(alpha < m_componentNum);
AutoPtr<FloatImage> tmp_image( new FloatImage() );
AutoPtr<FloatImage> dst_image( new FloatImage() );
PolyphaseKernel xkernel(filter, m_width, w, 32);
PolyphaseKernel ykernel(filter, m_height, h, 32);
{
tmp_image->allocate(m_componentNum, w, m_height);
dst_image->allocate(m_componentNum, w, h);
Array<float> tmp_column(h);
tmp_column.resize(h);
for (uint c = 0; c < m_componentNum; c++)
{
float * tmp_channel = tmp_image->channel(c);
for (uint y = 0; y < m_height; y++) {
this->applyKernelHorizontal(xkernel, y, c, alpha, wm, tmp_channel + y * w);
}
}
// Process all channels before applying vertical kernel to make sure alpha has been computed.
for (uint c = 0; c < m_componentNum; c++)
{
float * dst_channel = dst_image->channel(c);
for (uint x = 0; x < w; x++) {
tmp_image->applyKernelVertical(ykernel, x, c, alpha, wm, tmp_column.unsecureBuffer());
for (uint y = 0; y < h; y++) {
dst_channel[y * w + x] = tmp_column[y];
}
}
}
}
return dst_image.release();
}
/// Apply 2D kernel at the given coordinates and return result.
float FloatImage::applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode wm) const
float FloatImage::applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const
{
nvDebugCheck(k != NULL);
@ -707,7 +709,7 @@ float FloatImage::applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode w
/// Apply 1D vertical kernel at the given coordinates and return result.
float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, int c, WrapMode wm) const
float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const
{
nvDebugCheck(k != NULL);
@ -729,7 +731,7 @@ float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, int c, Wr
}
/// Apply 1D horizontal kernel at the given coordinates and return result.
float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, int c, WrapMode wm) const
float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const
{
nvDebugCheck(k != NULL);
@ -752,7 +754,7 @@ float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, int c,
/// Apply 1D vertical kernel at the given coordinates and return result.
void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, int c, WrapMode wm, float * output) const
void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * __restrict output) const
{
const uint length = k.length();
const float scale = float(length) / float(m_height);
@ -784,7 +786,7 @@ void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, int c, Wr
}
/// Apply 1D horizontal kernel at the given coordinates and return result.
void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, int c, WrapMode wm, float * output) const
void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * __restrict output) const
{
const uint length = k.length();
const float scale = float(length) / float(m_width);
@ -815,3 +817,93 @@ void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, int c,
}
}
/// Apply 1D vertical kernel at the given coordinates and return result.
void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * __restrict output) const
{
const uint length = k.length();
const float scale = float(length) / float(m_height);
const float iscale = 1.0f / scale;
const float width = k.width();
const int windowSize = k.windowSize();
const float * channel = this->channel(c);
const float * alpha = this->channel(a);
for (uint i = 0; i < length; i++)
{
const float center = (0.5f + i) * iscale;
const int left = (int)floorf(center - width);
const int right = (int)ceilf(center + width);
nvCheck(right - left <= windowSize);
float norm = 0;
float sum = 0;
for (int j = 0; j < windowSize; ++j)
{
const int idx = this->index(x, j+left, wm);
float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f));
norm += w;
sum += w * channel[idx];
}
output[i] = sum / norm;
}
}
/// Apply 1D horizontal kernel at the given coordinates and return result.
void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * __restrict output) const
{
const uint length = k.length();
const float scale = float(length) / float(m_width);
const float iscale = 1.0f / scale;
const float width = k.width();
const int windowSize = k.windowSize();
const float * channel = this->channel(c);
const float * alpha = this->channel(a);
for (uint i = 0; i < length; i++)
{
const float center = (0.5f + i) * iscale;
const int left = (int)floorf(center - width);
const int right = (int)ceilf(center + width);
nvDebugCheck(right - left <= windowSize);
float norm = 0.0f;
float sum = 0;
for (int j = 0; j < windowSize; ++j)
{
const int idx = this->index(left + j, y, wm);
float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f));
norm += w;
sum += w * channel[idx];
}
output[i] = sum / norm;
}
}
FloatImage* FloatImage::clone() const
{
FloatImage* copy = new FloatImage();
copy->m_width = m_width;
copy->m_height = m_height;
copy->m_componentNum = m_componentNum;
copy->m_count = m_count;
if(m_mem)
{
copy->allocate(m_componentNum, m_width, m_height);
memcpy(copy->m_mem, m_mem, m_count * sizeof(float));
}
return copy;
}

View File

@ -3,12 +3,20 @@
#ifndef NV_IMAGE_FLOATIMAGE_H
#define NV_IMAGE_FLOATIMAGE_H
#include <nvimage/nvimage.h>
#include <nvmath/Vector.h>
#include <nvcore/Debug.h>
#include <nvcore/Containers.h> // clamp
#include <nvimage/nvimage.h>
#include <stdlib.h> // abs
namespace nv
{
class Vector4;
class Matrix;
class Image;
class Filter;
class Kernel1;
@ -63,17 +71,19 @@ public:
NVIMAGE_API FloatImage * fastDownSample() const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, uint w, uint h, WrapMode wm) const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const;
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const;
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const;
//@}
NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode wm) const;
NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, int c, WrapMode wm) const;
NVIMAGE_API float applyKernelHorizontal(const Kernel1 * k, int x, int y, int c, WrapMode wm) const;
NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, int c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, int c, WrapMode wm, float * output) const;
NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const;
NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * output) const;
uint width() const { return m_width; }
@ -109,6 +119,9 @@ public:
float sampleLinearMirror(float x, float y, int c) const;
//@}
FloatImage* clone() const;
public:
uint index(uint x, uint y) const;
@ -226,14 +239,18 @@ inline uint FloatImage::indexRepeat(int x, int y) const
inline uint FloatImage::indexMirror(int x, int y) const
{
if (m_width == 1) x = 0;
x = abs(x);
while (x >= m_width) {
x = m_width + m_width - x - 2;
x = abs(m_width + m_width - x - 2);
}
if (m_height == 1) y = 0;
y = abs(y);
while (y >= m_height) {
y = m_height + m_height - y - 2;
y = abs(m_height + m_height - y - 2);
}
return index(x, y);

View File

@ -296,7 +296,7 @@ static bool downsample(const FloatImage * src, const BitMap * srcMask, const Flo
return true;
}
// This is the filter used in the Lumigraph paper. The Unreal engine uses something similar.
// This is the filter used in the Lumigraph paper.
void nv::fillPullPush(FloatImage * img, const BitMap * bmap)
{
nvCheck(img != NULL);
@ -644,8 +644,8 @@ struct LocalPixels
// This is a cubic extrapolation filter from Charles Bloom (DoPixelSeamFix).
void nv::fillCubicExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/)
// This is a quadratic extrapolation filter from Charles Bloom (DoPixelSeamFix). Used with his permission.
void nv::fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/)
{
nvCheck(passCount > 0);
nvCheck(img != NULL);

View File

@ -89,7 +89,7 @@ namespace nv
NVIMAGE_API void fillPullPush(FloatImage * img, const BitMap * bmap);
NVIMAGE_API void fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap);
NVIMAGE_API void fillCubicExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1);
NVIMAGE_API void fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1);
} // nv namespace

View File

@ -15,7 +15,7 @@ Image::Image() : m_width(0), m_height(0), m_format(Format_RGB), m_data(NULL)
{
}
Image::Image(const Image & img)
Image::Image(const Image & img) : m_data(NULL)
{
allocate(img.m_width, img.m_height);
m_format = img.m_format;

View File

@ -21,15 +21,16 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Ptr.h>
#include <nvmath/Color.h>
#include <nvimage/NormalMap.h>
#include <nvimage/Filter.h>
#include <nvimage/FloatImage.h>
#include <nvimage/Image.h>
#include <nvmath/Color.h>
#include <nvcore/Ptr.h>
using namespace nv;
// Create normal map using the given kernels.

View File

@ -39,7 +39,7 @@ namespace nv
bool isSupported() const
{
if (version != 1) {
printf("*** bad version number %u\n", version);
nvDebug("*** bad version number %u\n", version);
return false;
}
if (channel_count > 4) {

View File

@ -12,10 +12,14 @@ http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT
@@ This code needs to be reviewed, I'm not sure it's correct.
*/
#include <nvimage/Quantize.h>
#include <nvimage/Image.h>
#include <nvimage/PixelFormat.h>
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include <nvimage/Quantize.h>
#include <nvcore/Containers.h> // swap
using namespace nv;
@ -47,94 +51,20 @@ void nv::Quantize::BinaryAlpha( Image * image, int alpha_threshold /*= 127*/ )
// Simple quantization.
void nv::Quantize::RGB16( Image * image )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel32 = image->pixel(x, y);
// Convert to 16 bit and back to 32 using regular bit expansion.
Color32 pixel16 = toColor32( toColor16(pixel32) );
// Store color.
image->pixel(x, y) = pixel16;
}
}
Truncate(image, 5, 6, 5, 8);
}
// Alpha quantization.
void nv::Quantize::Alpha4( Image * image )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Convert to 4 bit using regular bit expansion.
pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
// Store color.
image->pixel(x, y) = pixel;
}
}
Truncate(image, 8, 8, 8, 4);
}
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg_RGB16( Image * image )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
// @@ Use fixed point?
Vector3 * row0 = new Vector3[w+2];
Vector3 * row1 = new Vector3[w+2];
memset(row0, 0, sizeof(Vector3)*(w+2));
memset(row1, 0, sizeof(Vector3)*(w+2));
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel32 = image->pixel(x, y);
// Add error. // @@ We shouldn't clamp here!
pixel32.r = clamp(int(pixel32.r) + int(row0[1+x].x()), 0, 255);
pixel32.g = clamp(int(pixel32.g) + int(row0[1+x].y()), 0, 255);
pixel32.b = clamp(int(pixel32.b) + int(row0[1+x].z()), 0, 255);
// Convert to 16 bit. @@ Use regular clamp?
Color32 pixel16 = toColor32( toColor16(pixel32) );
// Store color.
image->pixel(x, y) = pixel16;
// Compute new error.
Vector3 diff(float(pixel32.r - pixel16.r), float(pixel32.g - pixel16.g), float(pixel32.b - pixel16.b));
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
row1[1+x-1] += 3.0f / 16.0f * diff;
row1[1+x+0] += 5.0f / 16.0f * diff;
row1[1+x+1] += 1.0f / 16.0f * diff;
}
swap(row0, row1);
memset(row1, 0, sizeof(Vector3)*(w+2));
}
delete [] row0;
delete [] row1;
FloydSteinberg(image, 5, 6, 5, 8);
}
@ -188,34 +118,90 @@ void nv::Quantize::FloydSteinberg_BinaryAlpha( Image * image, int alpha_threshol
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg_Alpha4( Image * image )
{
FloydSteinberg(image, 8, 8, 8, 4);
}
void nv::Quantize::Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
// @@ Use fixed point?
float * row0 = new float[(w+2)];
float * row1 = new float[(w+2)];
memset(row0, 0, sizeof(float)*(w+2));
memset(row1, 0, sizeof(float)*(w+2));
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Add error.
int alpha = int(pixel.a) + int(row0[1+x]);
// Convert to our desired size, and reconstruct.
pixel.r = PixelFormat::convert(pixel.r, 8, rsize);
pixel.r = PixelFormat::convert(pixel.r, rsize, 8);
// Convert to 4 bit using regular bit expansion.
pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
pixel.g = PixelFormat::convert(pixel.g, 8, gsize);
pixel.g = PixelFormat::convert(pixel.g, gsize, 8);
pixel.b = PixelFormat::convert(pixel.b, 8, bsize);
pixel.b = PixelFormat::convert(pixel.b, bsize, 8);
pixel.a = PixelFormat::convert(pixel.a, 8, asize);
pixel.a = PixelFormat::convert(pixel.a, asize, 8);
// Store color.
image->pixel(x, y) = pixel;
}
}
}
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
Vector4 * row0 = new Vector4[w+2];
Vector4 * row1 = new Vector4[w+2];
memset(row0, 0, sizeof(Vector4)*(w+2));
memset(row1, 0, sizeof(Vector4)*(w+2));
for (uint y = 0; y < h; y++) {
for (uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Add error.
pixel.r = clamp(int(pixel.r) + int(row0[1+x].x()), 0, 255);
pixel.g = clamp(int(pixel.g) + int(row0[1+x].y()), 0, 255);
pixel.b = clamp(int(pixel.b) + int(row0[1+x].z()), 0, 255);
pixel.a = clamp(int(pixel.a) + int(row0[1+x].w()), 0, 255);
int r = pixel.r;
int g = pixel.g;
int b = pixel.b;
int a = pixel.a;
// Convert to our desired size, and reconstruct.
r = PixelFormat::convert(r, 8, rsize);
r = PixelFormat::convert(r, rsize, 8);
g = PixelFormat::convert(g, 8, gsize);
g = PixelFormat::convert(g, gsize, 8);
b = PixelFormat::convert(b, 8, bsize);
b = PixelFormat::convert(b, bsize, 8);
a = PixelFormat::convert(a, 8, asize);
a = PixelFormat::convert(a, asize, 8);
// Store color.
image->pixel(x, y) = Color32(r, g, b, a);
// Compute new error.
float diff = float(alpha - pixel.a);
Vector4 diff(float(int(pixel.r) - r), float(int(pixel.g) - g), float(int(pixel.b) - b), float(int(pixel.a) - a));
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
@ -225,10 +211,9 @@ void nv::Quantize::FloydSteinberg_Alpha4( Image * image )
}
swap(row0, row1);
memset(row1, 0, sizeof(float)*(w+2));
memset(row1, 0, sizeof(Vector4)*(w+2));
}
delete [] row0;
delete [] row1;
}

View File

@ -3,6 +3,9 @@
#ifndef NV_IMAGE_QUANTIZE_H
#define NV_IMAGE_QUANTIZE_H
#include <nvimage/nvimage.h>
namespace nv
{
class Image;
@ -17,6 +20,9 @@ namespace nv
void FloydSteinberg_BinaryAlpha(Image * img, int alpha_threshold = 127);
void FloydSteinberg_Alpha4(Image * img);
void Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize);
void FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize);
// @@ Add palette quantization algorithms!
}
}

View File

@ -108,7 +108,7 @@ public:
float area() const
{
const Vector3 d = extents();
return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
}
/// Get the volume of the box.
@ -118,6 +118,14 @@ public:
return 8.0f * (d.x() * d.y() * d.z());
}
/// Return true if the box contains the given point.
bool contains(Vector3::Arg p) const
{
return
m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
}
private:
Vector3 m_mins;
@ -125,15 +133,6 @@ private:
};
/*
/// Point inside box test.
inline bool pointInsideBox(const Box & b, Vector3::Arg p) const
{
return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z());
}
*/
} // nv namespace

View File

@ -7,8 +7,6 @@ SET(MATH_SRCS
Quaternion.h
Box.h
Color.h
Eigen.h Eigen.cpp
Fitting.h Fitting.cpp
Montecarlo.h Montecarlo.cpp
Random.h Random.cpp
SphericalHarmonic.h SphericalHarmonic.cpp

View File

@ -1,533 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "Eigen.h"
using namespace nv;
static const float EPS = 0.00001f;
static const int MAX_ITER = 100;
static void semi_definite_symmetric_eigen(const float *mat, int n, float *eigen_vec, float *eigen_val);
// Use power method to find the first eigenvector.
// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html
Vector3 nv::firstEigenVector(float matrix[6])
{
// Number of iterations. @@ Use a variable number of iterations.
const int NUM = 8;
Vector3 v(1, 1, 1);
for(int i = 0; i < NUM; i++) {
float x = v.x() * matrix[0] + v.y() * matrix[1] + v.z() * matrix[2];
float y = v.x() * matrix[1] + v.y() * matrix[3] + v.z() * matrix[4];
float z = v.x() * matrix[2] + v.y() * matrix[4] + v.z() * matrix[5];
float norm = max(max(x, y), z);
float iv = 1.0f / norm;
if (norm == 0.0f) {
return Vector3(zero);
}
v.set(x*iv, y*iv, z*iv);
}
return v;
}
/// Solve eigen system.
void Eigen::solve() {
semi_definite_symmetric_eigen(matrix, N, eigen_vec, eigen_val);
}
/// Solve eigen system.
void Eigen3::solve() {
// @@ Use lengyel code that seems to be more optimized.
#if 1
float v[3*3];
semi_definite_symmetric_eigen(matrix, 3, v, eigen_val);
eigen_vec[0].set(v[0], v[1], v[2]);
eigen_vec[1].set(v[3], v[4], v[5]);
eigen_vec[2].set(v[6], v[7], v[8]);
#else
const int maxSweeps = 32;
const float epsilon = 1.0e-10f;
float m11 = matrix[0]; // m(0,0);
float m12 = matrix[1]; // m(0,1);
float m13 = matrix[2]; // m(0,2);
float m22 = matrix[3]; // m(1,1);
float m23 = matrix[4]; // m(1,2);
float m33 = matrix[5]; // m(2,2);
//r.SetIdentity();
eigen_vec[0].set(1, 0, 0);
eigen_vec[1].set(0, 1, 0);
eigen_vec[2].set(0, 0, 1);
for (int a = 0; a < maxSweeps; a++)
{
// Exit if off-diagonal entries small enough
if ((fabs(m12) < epsilon) && (fabs(m13) < epsilon) && (fabs(m23) < epsilon))
{
break;
}
// Annihilate (1,2) entry
if (m12 != 0.0f)
{
float u = (m22 - m11) * 0.5f / m12;
float u2 = u * u;
float u2p1 = u2 + 1.0f;
float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u;
float c = 1.0f / sqrt(t * t + 1.0f);
float s = c * t;
m11 -= t * m12;
m22 += t * m12;
m12 = 0.0f;
float temp = c * m13 - s * m23;
m23 = s * m13 + c * m23;
m13 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * eigen_vec[i].x - s * eigen_vec[i].y;
eigen_vec[i].y = s * eigen_vec[i].x + c * eigen_vec[i].y;
eigen_vec[i].x = temp;
}
}
// Annihilate (1,3) entry
if (m13 != 0.0f)
{
float u = (m33 - m11) * 0.5f / m13;
float u2 = u * u;
float u2p1 = u2 + 1.0f;
float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u;
float c = 1.0f / sqrt(t * t + 1.0f);
float s = c * t;
m11 -= t * m13;
m33 += t * m13;
m13 = 0.0f;
float temp = c * m12 - s * m23;
m23 = s * m12 + c * m23;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * eigen_vec[i].x - s * eigen_vec[i].z;
eigen_vec[i].z = s * eigen_vec[i].x + c * eigen_vec[i].z;
eigen_vec[i].x = temp;
}
}
// Annihilate (2,3) entry
if (m23 != 0.0f)
{
float u = (m33 - m22) * 0.5f / m23;
float u2 = u * u;
float u2p1 = u2 + 1.0f;
float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u;
float c = 1.0f / sqrt(t * t + 1.0f);
float s = c * t;
m22 -= t * m23;
m33 += t * m23;
m23 = 0.0f;
float temp = c * m12 - s * m13;
m13 = s * m12 + c * m13;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * eigen_vec[i].y - s * eigen_vec[i].z;
eigen_vec[i].z = s * eigen_vec[i].y + c * eigen_vec[i].z;
eigen_vec[i].y = temp;
}
}
}
eigen_val[0] = m11;
eigen_val[1] = m22;
eigen_val[2] = m33;
#endif
}
/*---------------------------------------------------------------------------
Functions
---------------------------------------------------------------------------*/
/** @@ I don't remember where did I get this function.
* computes the eigen values and eigen vectors
* of a semi definite symmetric matrix
*
* - matrix is stored in column symmetric storage, i.e.
* matrix = { m11, m12, m22, m13, m23, m33, m14, m24, m34, m44 ... }
* size = n(n+1)/2
*
* - eigen_vectors (return) = { v1, v2, v3, ..., vn } where vk = vk0, vk1, ..., vkn
* size = n^2, must be allocated by caller
*
* - eigen_values (return) are in decreasing order
* size = n, must be allocated by caller
*/
void semi_definite_symmetric_eigen(
const float *mat, int n, float *eigen_vec, float *eigen_val
) {
float *a,*v;
float a_norm,a_normEPS,thr,thr_nn;
int nb_iter = 0;
int jj;
int i,j,k,ij,ik,l,m,lm,mq,lq,ll,mm,imv,im,iq,ilv,il,nn;
int *index;
float a_ij,a_lm,a_ll,a_mm,a_im,a_il;
float a_lm_2;
float v_ilv,v_imv;
float x;
float sinx,sinx_2,cosx,cosx_2,sincos;
float delta;
// Number of entries in mat
nn = (n*(n+1))/2;
// Step 1: Copy mat to a
a = new float[nn];
for( ij=0; ij<nn; ij++ ) {
a[ij] = mat[ij];
}
// Ugly Fortran-porting trick: indices for a are between 1 and n
a--;
// Step 2 : Init diagonalization matrix as the unit matrix
v = new float[n*n];
ij = 0;
for( i=0; i<n; i++ ) {
for( j=0; j<n; j++ ) {
if( i==j ) {
v[ij++] = 1.0;
} else {
v[ij++] = 0.0;
}
}
}
// Ugly Fortran-porting trick: indices for v are between 1 and n
v--;
// Step 3 : compute the weight of the non diagonal terms
ij = 1 ;
a_norm = 0.0;
for( i=1; i<=n; i++ ) {
for( j=1; j<=i; j++ ) {
if( i!=j ) {
a_ij = a[ij];
a_norm += a_ij*a_ij;
}
ij++;
}
}
if( a_norm != 0.0 ) {
a_normEPS = a_norm*EPS;
thr = a_norm ;
// Step 4 : rotations
while( thr > a_normEPS && nb_iter < MAX_ITER ) {
nb_iter++;
thr_nn = thr / nn;
for( l=1 ; l< n; l++ ) {
for( m=l+1; m<=n; m++ ) {
// compute sinx and cosx
lq = (l*l-l)/2;
mq = (m*m-m)/2;
lm = l+mq;
a_lm = a[lm];
a_lm_2 = a_lm*a_lm;
if( a_lm_2 < thr_nn ) {
continue ;
}
ll = l+lq;
mm = m+mq;
a_ll = a[ll];
a_mm = a[mm];
delta = a_ll - a_mm;
if( delta == 0.0f ) {
x = - PI/4 ;
} else {
x = - atanf( (a_lm+a_lm) / delta ) / 2.0f ;
}
sinx = sinf(x);
cosx = cosf(x);
sinx_2 = sinx*sinx;
cosx_2 = cosx*cosx;
sincos = sinx*cosx;
// rotate L and M columns
ilv = n*(l-1);
imv = n*(m-1);
for( i=1; i<=n;i++ ) {
if( (i!=l) && (i!=m) ) {
iq = (i*i-i)/2;
if( i<m ) {
im = i + mq;
} else {
im = m + iq;
}
a_im = a[im];
if( i<l ) {
il = i + lq;
} else {
il = l + iq;
}
a_il = a[il];
a[il] = a_il*cosx - a_im*sinx;
a[im] = a_il*sinx + a_im*cosx;
}
ilv++;
imv++;
v_ilv = v[ilv];
v_imv = v[imv];
v[ilv] = cosx*v_ilv - sinx*v_imv;
v[imv] = sinx*v_ilv + cosx*v_imv;
}
x = a_lm*sincos; x+=x;
a[ll] = a_ll*cosx_2 + a_mm*sinx_2 - x;
a[mm] = a_ll*sinx_2 + a_mm*cosx_2 + x;
a[lm] = 0.0;
thr = fabs( thr - a_lm_2 );
}
}
}
}
// Step 5: index conversion and copy eigen values
// back from Fortran to C++
a++;
for( i=0; i<n; i++ ) {
k = i + (i*(i+1))/2;
eigen_val[i] = a[k];
}
delete[] a;
// Step 6: sort the eigen values and eigen vectors
index = new int[n];
for( i=0; i<n; i++ ) {
index[i] = i;
}
for( i=0; i<(n-1); i++ ) {
x = eigen_val[i];
k = i;
for( j=i+1; j<n; j++ ) {
if( x < eigen_val[j] ) {
k = j;
x = eigen_val[j];
}
}
eigen_val[k] = eigen_val[i];
eigen_val[i] = x;
jj = index[k];
index[k] = index[i];
index[i] = jj;
}
// Step 7: save the eigen vectors
v++; // back from Fortran to to C++
ij = 0;
for( k=0; k<n; k++ ) {
ik = index[k]*n;
for( i=0; i<n; i++ ) {
eigen_vec[ij++] = v[ik++];
}
}
delete[] v ;
delete[] index;
return;
}
//_________________________________________________________
// Eric Lengyel code:
// http://www.terathon.com/code/linear.html
#if 0
const float epsilon = 1.0e-10F;
const int maxSweeps = 32;
struct Matrix3D
{
float n[3][3];
float& operator()(int i, int j)
{
return (n[j][i]);
}
const float& operator()(int i, int j) const
{
return (n[j][i]);
}
void SetIdentity(void)
{
n[0][0] = n[1][1] = n[2][2] = 1.0F;
n[0][1] = n[0][2] = n[1][0] = n[1][2] = n[2][0] = n[2][1] = 0.0F;
}
};
void CalculateEigensystem(const Matrix3D& m, float *lambda, Matrix3D& r)
{
float m11 = m(0,0);
float m12 = m(0,1);
float m13 = m(0,2);
float m22 = m(1,1);
float m23 = m(1,2);
float m33 = m(2,2);
r.SetIdentity();
for (int a = 0; a < maxSweeps; a++)
{
// Exit if off-diagonal entries small enough
if ((Fabs(m12) < epsilon) && (Fabs(m13) < epsilon) &&
(Fabs(m23) < epsilon)) break;
// Annihilate (1,2) entry
if (m12 != 0.0F)
{
float u = (m22 - m11) * 0.5F / m12;
float u2 = u * u;
float u2p1 = u2 + 1.0F;
float t = (u2p1 != u2) ?
((u < 0.0F) ? -1.0F : 1.0F) * (sqrt(u2p1) - fabs(u)) : 0.5F / u;
float c = 1.0F / sqrt(t * t + 1.0F);
float s = c * t;
m11 -= t * m12;
m22 += t * m12;
m12 = 0.0F;
float temp = c * m13 - s * m23;
m23 = s * m13 + c * m23;
m13 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * r(i,0) - s * r(i,1);
r(i,1) = s * r(i,0) + c * r(i,1);
r(i,0) = temp;
}
}
// Annihilate (1,3) entry
if (m13 != 0.0F)
{
float u = (m33 - m11) * 0.5F / m13;
float u2 = u * u;
float u2p1 = u2 + 1.0F;
float t = (u2p1 != u2) ?
((u < 0.0F) ? -1.0F : 1.0F) * (sqrt(u2p1) - fabs(u)) : 0.5F / u;
float c = 1.0F / sqrt(t * t + 1.0F);
float s = c * t;
m11 -= t * m13;
m33 += t * m13;
m13 = 0.0F;
float temp = c * m12 - s * m23;
m23 = s * m12 + c * m23;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * r(i,0) - s * r(i,2);
r(i,2) = s * r(i,0) + c * r(i,2);
r(i,0) = temp;
}
}
// Annihilate (2,3) entry
if (m23 != 0.0F)
{
float u = (m33 - m22) * 0.5F / m23;
float u2 = u * u;
float u2p1 = u2 + 1.0F;
float t = (u2p1 != u2) ?
((u < 0.0F) ? -1.0F : 1.0F) * (sqrt(u2p1) - fabs(u)) : 0.5F / u;
float c = 1.0F / sqrt(t * t + 1.0F);
float s = c * t;
m22 -= t * m23;
m33 += t * m23;
m23 = 0.0F;
float temp = c * m12 - s * m13;
m13 = s * m12 + c * m13;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * r(i,1) - s * r(i,2);
r(i,2) = s * r(i,1) + c * r(i,2);
r(i,1) = temp;
}
}
}
lambda[0] = m11;
lambda[1] = m22;
lambda[2] = m33;
}
#endif

View File

@ -1,140 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_EIGEN_H
#define NV_MATH_EIGEN_H
#include <nvcore/Containers.h> // swap
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
namespace nv
{
// Compute first eigen vector using the power method.
Vector3 firstEigenVector(float matrix[6]);
/// Generic eigen-solver.
class Eigen
{
public:
/// Ctor.
Eigen(uint n) : N(n)
{
uint size = n * (n + 1) / 2;
matrix = new float[size];
eigen_vec = new float[N*N];
eigen_val = new float[N];
}
/// Dtor.
~Eigen()
{
delete [] matrix;
delete [] eigen_vec;
delete [] eigen_val;
}
NVMATH_API void solve();
/// Matrix accesor.
float & operator()(uint x, uint y)
{
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
/// Matrix const accessor.
float operator()(uint x, uint y) const
{
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
Vector3 eigenVector3(uint i) const
{
nvCheck(3 == N);
nvCheck(i < N);
return Vector3(eigen_vec[i*N+0], eigen_vec[i*N+1], eigen_vec[i*N+2]);
}
Vector4 eigenVector4(uint i) const
{
nvCheck(4 == N);
nvCheck(i < N);
return Vector4(eigen_vec[i*N+0], eigen_vec[i*N+1], eigen_vec[i*N+2], eigen_vec[i*N+3]);
}
float eigenValue(uint i) const
{
nvCheck(i < N);
return eigen_val[i];
}
private:
const uint N;
float * matrix;
float * eigen_vec;
float * eigen_val;
};
/// 3x3 eigen-solver.
/// Based on Eric Lengyel's code:
/// http://www.terathon.com/code/linear.html
class Eigen3
{
public:
/** Ctor. */
Eigen3() {}
NVMATH_API void solve();
/// Matrix accesor.
float & operator()(uint x, uint y)
{
nvDebugCheck( x < 3 && y < 3 );
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
/// Matrix const accessor.
float operator()(uint x, uint y) const
{
nvDebugCheck( x < 3 && y < 3 );
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
/// Get ith eigen vector.
Vector3 eigenVector(uint i) const
{
nvCheck(i < 3);
return eigen_vec[i];
}
/** Get ith eigen value. */
float eigenValue(uint i) const
{
nvCheck(i < 3);
return eigen_val[i];
}
private:
float matrix[3+2+1];
Vector3 eigen_vec[3];
float eigen_val[3];
};
} // nv namespace
#endif // NV_MATH_EIGEN_H

View File

@ -1,134 +0,0 @@
// License: Wild Magic License Version 3
// http://geometrictools.com/License/WildMagic3License.pdf
#include "Fitting.h"
#include "Eigen.h"
using namespace nv;
/** Fit a 3d line to the given set of points.
*
* Based on code from:
* http://geometrictools.com/
*/
Line3 Fit::bestLine(const Array<Vector3> & pointArray)
{
nvDebugCheck(pointArray.count() > 0);
Line3 line;
const uint pointCount = pointArray.count();
const float inv_num = 1.0f / pointCount;
// compute the mean of the points
Vector3 center(zero);
for(uint i = 0; i < pointCount; i++) {
center += pointArray[i];
}
line.setOrigin(center * inv_num);
// compute the covariance matrix of the points
float covariance[6] = {0, 0, 0, 0, 0, 0};
for(uint i = 0; i < pointCount; i++) {
Vector3 diff = pointArray[i] - line.origin();
covariance[0] += diff.x() * diff.x();
covariance[1] += diff.x() * diff.y();
covariance[2] += diff.x() * diff.z();
covariance[3] += diff.y() * diff.y();
covariance[4] += diff.y() * diff.z();
covariance[5] += diff.z() * diff.z();
}
line.setDirection(normalizeSafe(firstEigenVector(covariance), Vector3(zero), 0.0f));
// @@ This variant is from David Eberly... I'm not sure how that works.
/*sum_xx *= inv_num;
sum_xy *= inv_num;
sum_xz *= inv_num;
sum_yy *= inv_num;
sum_yz *= inv_num;
sum_zz *= inv_num;
// set up the eigensolver
Eigen3 ES;
ES(0,0) = sum_yy + sum_zz;
ES(0,1) = -sum_xy;
ES(0,2) = -sum_xz;
ES(1,1) = sum_xx + sum_zz;
ES(1,2) = -sum_yz;
ES(2,2) = sum_xx + sum_yy;
// compute eigenstuff, smallest eigenvalue is in last position
ES.solve();
line.setDirection(ES.eigenVector(2));
nvCheck( isNormalized(line.direction()) );
*/
return line;
}
/** Fit a 3d plane to the given set of points.
*
* Based on code from:
* http://geometrictools.com/
*/
Vector4 Fit::bestPlane(const Array<Vector3> & pointArray)
{
Vector3 center(zero);
const uint pointCount = pointArray.count();
const float inv_num = 1.0f / pointCount;
// compute the mean of the points
for(uint i = 0; i < pointCount; i++) {
center += pointArray[i];
}
center *= inv_num;
// compute the covariance matrix of the points
float sum_xx = 0.0f;
float sum_xy = 0.0f;
float sum_xz = 0.0f;
float sum_yy = 0.0f;
float sum_yz = 0.0f;
float sum_zz = 0.0f;
for(uint i = 0; i < pointCount; i++) {
Vector3 diff = pointArray[i] - center;
sum_xx += diff.x() * diff.x();
sum_xy += diff.x() * diff.y();
sum_xz += diff.x() * diff.z();
sum_yy += diff.y() * diff.y();
sum_yz += diff.y() * diff.z();
sum_zz += diff.z() * diff.z();
}
sum_xx *= inv_num;
sum_xy *= inv_num;
sum_xz *= inv_num;
sum_yy *= inv_num;
sum_yz *= inv_num;
sum_zz *= inv_num;
// set up the eigensolver
Eigen3 ES;
ES(0,0) = sum_yy + sum_zz;
ES(0,1) = -sum_xy;
ES(0,2) = -sum_xz;
ES(1,1) = sum_xx + sum_zz;
ES(1,2) = -sum_yz;
ES(2,2) = sum_xx + sum_yy;
// compute eigenstuff, greatest eigenvalue is in first position
ES.solve();
Vector3 normal = ES.eigenVector(0);
nvCheck(isNormalized(normal));
float offset = dot(normal, center);
return Vector4(normal, offset);
}

View File

@ -1,78 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_FITTING_H
#define NV_MATH_FITTING_H
#include <nvmath/Vector.h>
namespace nv
{
/// 3D Line.
struct Line3
{
/// Ctor.
Line3() : m_origin(zero), m_direction(zero)
{
}
/// Copy ctor.
Line3(const Line3 & l) : m_origin(l.m_origin), m_direction(l.m_direction)
{
}
/// Ctor.
Line3(Vector3::Arg o, Vector3::Arg d) : m_origin(o), m_direction(d)
{
}
/// Normalize the line.
void normalize()
{
m_direction = nv::normalize(m_direction);
}
/// Project a point onto the line.
Vector3 projectPoint(Vector3::Arg point) const
{
nvDebugCheck(isNormalized(m_direction));
Vector3 v = point - m_origin;
return m_origin + m_direction * dot(m_direction, v);
}
/// Compute distance to line.
float distanceToPoint(Vector3::Arg point) const
{
nvDebugCheck(isNormalized(m_direction));
Vector3 v = point - m_origin;
Vector3 l = v - m_direction * dot(m_direction, v);
return length(l);
}
const Vector3 & origin() const { return m_origin; }
void setOrigin(Vector3::Arg value) { m_origin = value; }
const Vector3 & direction() const { return m_direction; }
void setDirection(Vector3::Arg value) { m_direction = value; }
private:
Vector3 m_origin;
Vector3 m_direction;
};
namespace Fit
{
NVMATH_API Line3 bestLine(const Array<Vector3> & pointArray);
NVMATH_API Vector4 bestPlane(const Array<Vector3> & pointArray);
} // Fit namespace
} // nv namespace
#endif // _PI_MATHLIB_FITTING_H_

17
src/nvmath/Plane.cpp Normal file
View File

@ -0,0 +1,17 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "Plane.h"
#include "Matrix.h"
namespace nv
{
Plane transformPlane(const Matrix& m, Plane::Arg p)
{
Vector3 newVec = transformVector(m, p.vector());
Vector3 ptInPlane = p.offset() * p.vector();
ptInPlane = transformPoint(m, ptInPlane);
return Plane(newVec, ptInPlane);
}
}

77
src/nvmath/Plane.h Normal file
View File

@ -0,0 +1,77 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_PLANE_H
#define NV_MATH_PLANE_H
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
namespace nv
{
class Matrix;
class NVMATH_CLASS Plane
{
public:
typedef Plane const & Arg;
Plane();
Plane(float x, float y, float z, float w);
Plane(Vector4::Arg v);
Plane(Vector3::Arg v, float d);
Plane(Vector3::Arg normal, Vector3::Arg point);
const Plane & operator=(Plane::Arg v);
Vector3 vector() const;
scalar offset() const;
const Vector4 & asVector() const;
Vector4 & asVector();
void operator*=(scalar s);
private:
Vector4 p;
};
inline Plane::Plane() {}
inline Plane::Plane(float x, float y, float z, float w) : p(x, y, z, w) {}
inline Plane::Plane(Vector4::Arg v) : p(v) {}
inline Plane::Plane(Vector3::Arg v, float d) : p(v, d) {}
inline Plane::Plane(Vector3::Arg normal, Vector3::Arg point) : p(normal, dot(normal, point)) {}
inline const Plane & Plane::operator=(Plane::Arg v) { p = v.p; return *this; }
inline Vector3 Plane::vector() const { return p.xyz(); }
inline scalar Plane::offset() const { return p.w(); }
inline const Vector4 & Plane::asVector() const { return p; }
inline Vector4 & Plane::asVector() { return p; }
// Normalize plane.
inline Plane normalize(Plane::Arg plane, float epsilon = NV_EPSILON)
{
const float len = length(plane.vector());
nvDebugCheck(!isZero(len, epsilon));
const float inv = 1.0f / len;
return Plane(plane.asVector() * inv);
}
// Get the distance from the given point to this plane.
inline float distance(Plane::Arg plane, Vector3::Arg point)
{
return dot(plane.vector(), point) - plane.offset();
}
inline void Plane::operator*=(scalar s)
{
scale(p, s);
}
Plane transformPlane(const Matrix&, Plane::Arg);
} // nv namespace
#endif // NV_MATH_PLANE_H

View File

@ -48,19 +48,37 @@
#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)
*/
inline float sqrt_assert(const float f)
inline double sqrt_assert(const double f)
{
nvDebugCheck(f >= 0.0f);
return sqrt(f);
}
inline float sqrtf_assert(const float f)
{
nvDebugCheck(f >= 0.0f);
return sqrtf(f);
}
inline float acos_assert(const float f)
inline double acos_assert(const double f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return acos(f);
}
inline float acosf_assert(const float f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return acosf(f);
}
inline float asin_assert(const float f)
inline double asin_assert(const double f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return asin(f);
}
inline float asinf_assert(const float f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return asinf(f);
@ -68,11 +86,11 @@ inline float asin_assert(const float f)
// Replace default functions with asserting ones.
#define sqrt sqrt_assert
#define sqrtf sqrt_assert
#define sqrtf sqrtf_assert
#define acos acos_assert
#define acosf acos_assert
#define acosf acosf_assert
#define asin asin_assert
#define asinf asin_assert
#define asinf asinf_assert
#if NV_OS_WIN32
#include <float.h>
@ -136,6 +154,11 @@ inline float lerp(float f0, float f1, float t)
return f0 * s + f1 * t;
}
inline float square(float f)
{
return f * f;
}
} // nv
#endif // NV_MATH_H

View File

@ -13,10 +13,10 @@ SET(NVTT_SRCS
CompressDXT.cpp
CompressRGB.h
CompressRGB.cpp
FastCompressDXT.h
FastCompressDXT.cpp
QuickCompressDXT.h
QuickCompressDXT.cpp
OptimalCompressDXT.h
OptimalCompressDXT.cpp
SingleColorLookup.h
CompressionOptions.h
CompressionOptions.cpp
@ -44,7 +44,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVTT_EXPORTS)
IF(NVTT_SHARED)
ADD_LIBRARY(nvtt SHARED ${DXT_SRCS})
ADD_DEFINITIONS(-DNVTT_SHARED=1)
ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED)
@ -79,9 +80,6 @@ TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(stress tests/stress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(stress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)

View File

@ -29,8 +29,8 @@
#include "nvtt.h"
#include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
@ -57,26 +57,33 @@ using namespace nv;
using namespace nvtt;
void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
{
const uint w = image->width();
const uint h = image->height();
}
nv::FastCompressor::~FastCompressor()
{
}
void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1(rgba.color(0), &block);
}
else
{
QuickCompress::compressDXT1(rgba, &block);
}
QuickCompress::compressDXT1(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -86,17 +93,17 @@ void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & ou
}
void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & outputOptions)
void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
QuickCompress::compressDXT1a(rgba, &block);
@ -108,18 +115,19 @@ void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & o
}
void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT3 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
rgba.init(m_image, x, y);
QuickCompress::compressDXT3(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -129,18 +137,19 @@ void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Privat
}
void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
rgba.init(m_image, x, y);
QuickCompress::compressDXT5(rgba, &block, 0);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -150,22 +159,21 @@ void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Privat
}
void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
// copy X coordinate to alpha channel and Y coordinate to green channel.
rgba.swizzleDXT5n();
compressBlock_BoundsRange(rgba, &block);
QuickCompress::compressDXT5(rgba, &block, 0);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -175,42 +183,28 @@ void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Priva
}
void nv::fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
{
// @@ TODO
// compress red channel (X)
}
void nv::fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
nv::SlowCompressor::~SlowCompressor()
{
// @@ TODO
// compress red, green channels (X,Y)
}
void nv::doPrecomputation()
void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
static bool done = false; // @@ Stop using statics for reentrancy.
if (!done)
{
done = true;
squish::FastClusterFit::DoPrecomputation();
}
m_image = image;
m_alphaMode = alphaMode;
}
void nv::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
doPrecomputation();
//squish::WeightedClusterFit fit;
//squish::ClusterFit fit;
squish::FastClusterFit fit;
@ -219,11 +213,11 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1(rgba.color(0), &block);
OptimalCompress::compressDXT1(rgba.color(0), &block);
}
else
{
@ -240,10 +234,10 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
}
void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
@ -254,12 +248,27 @@ void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outpu
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
bool anyAlpha = false;
bool allAlpha = true;
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a < 128) anyAlpha = true;
else allAlpha = false;
}
if ((!anyAlpha && rgba.isSingleColor() || allAlpha))
{
OptimalCompress::compressDXT1a(rgba.color(0), &block);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -269,29 +278,37 @@ void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outpu
}
void nv::compressDXT3(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT3 block;
squish::WeightedClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
// Compress explicit alpha.
compressBlock(rgba, &block.alpha);
OptimalCompress::compressDXT3A(rgba, &block.alpha);
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -300,10 +317,10 @@ void nv::compressDXT3(const Image * image, const OutputOptions::Private & output
}
}
void nv::compressDXT5(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
@ -314,23 +331,29 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
// Compress alpha.
uint error;
if (compressionOptions.quality == Quality_Highest)
{
error = compressBlock_BruteForce(rgba, &block.alpha);
OptimalCompress::compressDXT5A(rgba, &block.alpha);
}
else
{
error = compressBlock_Iterative(rgba, &block.alpha);
QuickCompress::compressDXT5A(rgba, &block.alpha);
}
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -340,33 +363,33 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
}
void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
doPrecomputation();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
rgba.init(m_image, x, y);
// copy X coordinate to green channel and Y coordinate to alpha channel.
rgba.swizzleDXT5n();
// Compress X.
uint error = compressBlock_Iterative(rgba, &block.alpha);
if (compressionOptions.quality == Quality_Highest)
{
error = compressBlock_BruteForce(rgba, &block.alpha);
OptimalCompress::compressDXT5A(rgba, &block.alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block.alpha);
}
// Compress Y.
QuickCompress::compressDXT1G(rgba, &block.color);
OptimalCompress::compressDXT1G(rgba, &block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -376,31 +399,27 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
}
void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
AlphaBlockDXT5 block;
uint totalError = 0;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
//error = compressBlock_BoundsRange(rgba, &block);
uint error = compressBlock_Iterative(rgba, &block);
rgba.init(m_image, x, y);
if (compressionOptions.quality == Quality_Highest)
{
// Try brute force algorithm.
error = compressBlock_BruteForce(rgba, &block);
OptimalCompress::compressDXT5A(rgba, &block);
}
else
{
QuickCompress::compressDXT5A(rgba, &block);
}
totalError += error;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -410,10 +429,10 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o
}
void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock xcolor;
ColorBlock ycolor;
@ -423,24 +442,21 @@ void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & o
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
xcolor.init(image, x, y);
xcolor.init(m_image, x, y);
xcolor.splatX();
ycolor.init(image, x, y);
ycolor.init(m_image, x, y);
ycolor.splatY();
// @@ Compute normal error, instead of separate xy errors.
uint xerror, yerror;
if (compressionOptions.quality == Quality_Highest)
{
xerror = compressBlock_BruteForce(xcolor, &block.x);
yerror = compressBlock_BruteForce(ycolor, &block.y);
OptimalCompress::compressDXT5A(xcolor, &block.x);
OptimalCompress::compressDXT5A(ycolor, &block.y);
}
else
{
xerror = compressBlock_Iterative(xcolor, &block.x);
yerror = compressBlock_Iterative(ycolor, &block.y);
QuickCompress::compressDXT5A(xcolor, &block.x);
QuickCompress::compressDXT5A(ycolor, &block.y);
}
if (outputOptions.outputHandler != NULL) {

View File

@ -32,25 +32,45 @@ namespace nv
class Image;
class FloatImage;
void doPrecomputation();
class FastCompressor
{
public:
FastCompressor();
~FastCompressor();
// Fast compressors.
void fastCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
// Normal compressors.
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
class SlowCompressor
{
public:
SlowCompressor();
~SlowCompressor();
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
// External compressors.
#if defined(HAVE_S3QUANT)

View File

@ -115,12 +115,18 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
c |= PixelFormat::convert(src[x].b, 8, bsize) << bshift;
c |= PixelFormat::convert(src[x].a, 8, asize) << ashift;
// Output one byte at a time. @@ Not tested... Does this work on LE and BE?
// Output one byte at a time.
for (uint i = 0; i < byteCount; i++)
{
*(dst + x * byteCount) = (c >> (i * 8)) & 0xFF;
*(dst + x * byteCount + i) = (c >> (i * 8)) & 0xFF;
}
}
// Zero padding.
for (uint x = w * byteCount; x < pitch; x++)
{
*(dst + x) = 0;
}
}
if (outputOptions.outputHandler != NULL)

View File

@ -34,6 +34,7 @@
#include <nvimage/Filter.h>
#include <nvimage/Quantize.h>
#include <nvimage/NormalMap.h>
#include <nvimage/PixelFormat.h>
#include "Compressor.h"
#include "InputOptions.h"
@ -41,7 +42,6 @@
#include "OutputOptions.h"
#include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "CompressRGB.h"
#include "cuda/CudaUtils.h"
#include "cuda/CudaCompressDXT.h"
@ -56,7 +56,7 @@ namespace
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) {
if (format == Format_DXT1 || format == Format_DXT1a) {
return 8;
}
else if (format == Format_DXT3) {
@ -71,9 +71,6 @@ namespace
else if (format == Format_BC5) {
return 16;
}
else if (format == Format_CTX1) {
return 8;
}
return 0;
}
@ -203,7 +200,7 @@ namespace nvtt
AutoPtr<FloatImage> m_floatImage;
};
}
} // nvtt namespace
Compressor::Compressor() : m(*new Compressor::Private())
@ -214,13 +211,24 @@ Compressor::Compressor() : m(*new Compressor::Private())
if (m.cudaEnabled)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
}
Compressor::~Compressor()
{
delete &m;
cuda::exit();
}
@ -234,7 +242,17 @@ void Compressor::enableCudaAcceleration(bool enable)
if (m.cudaEnabled && m.cuda == NULL)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
}
@ -329,14 +347,14 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
if (compressionOptions.format == Format_RGBA)
{
header.setPitch(4 * inputOptions.targetWidth);
header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount));
header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask);
}
else
{
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) {
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
@ -357,10 +375,6 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
header.setFourCC('A', 'T', 'I', '2');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_CTX1) {
header.setFourCC('C', 'T', 'X', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
@ -417,7 +431,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
quantizeMipmap(mipmap, compressionOptions);
compressMipmap(mipmap, compressionOptions, outputOptions);
compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
// Compute extents of next mipmap:
w = max(1U, w / 2);
@ -566,7 +580,7 @@ void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Priva
// Resize image.
BoxFilter boxFilter;
mipmap.setImage(mipmap.asFloatImage()->downSample(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
}
@ -613,13 +627,6 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
{
nvDebugCheck(mipmap.asFixedImage() != NULL);
if (compressionOptions.enableColorDithering)
{
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
{
Quantize::FloydSteinberg_RGB16(mipmap.asMutableFixedImage());
}
}
if (compressionOptions.binaryAlpha)
{
if (compressionOptions.enableAlphaDithering)
@ -631,30 +638,68 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
}
}
else
if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering)
{
uint rsize = 8;
uint gsize = 8;
uint bsize = 8;
uint asize = 8;
if (compressionOptions.enableColorDithering)
{
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
{
rsize = 5;
gsize = 6;
bsize = 5;
}
else if (compressionOptions.format == Format_RGB)
{
uint rshift, gshift, bshift;
PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize);
PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize);
PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize);
}
}
if (compressionOptions.enableAlphaDithering)
{
if (compressionOptions.format == Format_DXT3)
{
Quantize::Alpha4(mipmap.asMutableFixedImage());
asize = 4;
}
else if (compressionOptions.format == Format_DXT1a)
else if (compressionOptions.format == Format_RGB)
{
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
uint ashift;
PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize);
}
}
if (compressionOptions.binaryAlpha)
{
asize = 8; // Already quantized.
}
Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize);
}
}
// Compress the given mipmap.
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
const Image * image = mipmap.asFixedImage();
nvDebugCheck(image != NULL);
FastCompressor fast;
fast.setImage(image, inputOptions.alphaMode);
SlowCompressor slow;
slow.setImage(image, inputOptions.alphaMode);
const bool useCuda = cudaEnabled && image->width() * image->height() >= 512;
if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
{
compressRGB(image, outputOptions, compressionOptions);
@ -678,18 +723,19 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT1(image, outputOptions);
fast.compressDXT1(outputOptions);
}
else
{
if (cudaEnabled)
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT1(image, outputOptions, compressionOptions);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
}
else
{
compressDXT1(image, outputOptions, compressionOptions);
slow.compressDXT1(compressionOptions, outputOptions);
}
}
}
@ -697,49 +743,38 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT1a(image, outputOptions);
fast.compressDXT1a(outputOptions);
}
else
{
if (cudaEnabled)
if (useCuda)
{
nvDebugCheck(cudaSupported);
/*cuda*/compressDXT1a(image, outputOptions, compressionOptions);
/*cuda*/slow.compressDXT1a(compressionOptions, outputOptions);
}
else
{
compressDXT1a(image, outputOptions, compressionOptions);
slow.compressDXT1a(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT1n)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT1n(image, outputOptions, compressionOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
}
else if (compressionOptions.format == Format_DXT3)
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT3(image, outputOptions);
fast.compressDXT3(outputOptions);
}
else
{
if (cudaEnabled)
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT3(image, outputOptions, compressionOptions);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT3(compressionOptions, outputOptions);
}
else
{
compressDXT3(image, outputOptions, compressionOptions);
slow.compressDXT3(compressionOptions, outputOptions);
}
}
}
@ -747,18 +782,19 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT5(image, outputOptions);
fast.compressDXT5(outputOptions);
}
else
{
if (cudaEnabled)
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT5(image, outputOptions, compressionOptions);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT5(compressionOptions, outputOptions);
}
else
{
compressDXT5(image, outputOptions, compressionOptions);
slow.compressDXT5(compressionOptions, outputOptions);
}
}
}
@ -766,32 +802,20 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT5n(image, outputOptions);
fast.compressDXT5n(outputOptions);
}
else
{
compressDXT5n(image, outputOptions, compressionOptions);
slow.compressDXT5n(compressionOptions, outputOptions);
}
}
else if (compressionOptions.format == Format_BC4)
{
compressBC4(image, outputOptions, compressionOptions);
slow.compressBC4(compressionOptions, outputOptions);
}
else if (compressionOptions.format == Format_BC5)
{
compressBC5(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_CTX1)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressCTX1(image, outputOptions, compressionOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
slow.compressBC5(compressionOptions, outputOptions);
}
return true;

View File

@ -60,7 +60,7 @@ namespace nvtt
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
bool compressMipmap(const Mipmap & mipmap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
public:

File diff suppressed because it is too large Load Diff

View File

@ -1,87 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_FASTCOMPRESSDXT_H
#define NV_TT_FASTCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
// Color compression:
// Compressor that uses the extremes of the luminance axis.
void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the extremes of the luminance axis.
void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box.
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box and takes alpha into account.
void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the best fit axis.
void compressBlock_BestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Simple, but slow compressor that tests all color pairs.
void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force 6d search along the best fit axis.
void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Spatial greedy search.
void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force compressor for DXT5n
void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
// Minimize error of the endpoints.
void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
// Alpha compression:
void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block);
uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
} // nv namespace
#endif // NV_TT_FASTCOMPRESSDXT_H

View File

@ -0,0 +1,368 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
using namespace nv;
using namespace OptimalCompress;
namespace
{
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
// Choose quantized color that produces less error. Used by DXT3 compressor.
inline static uint quantize4(uint8 a)
{
int q0 = (a >> 4) - 1;
int q1 = (a >> 4);
int q2 = (a >> 4) + 1;
q0 = (q0 << 4) | q0;
q1 = (q1 << 4) | q1;
q2 = (q2 << 4) | q2;
int d0 = abs(q0 - a);
int d1 = abs(q1 - a);
int d2 = abs(q2 - a);
if (d0 < d1 && d0 < d2) return q0 >> 4;
if (d1 < d2) return q1 >> 4;
return q2 >> 4;
}
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best;
for (uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += besterror;
}
return totalError;
}
static void computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
block->setIndex(i, best);
}
}
} // namespace
// Single color compressor, based on:
// https://mollyrocket.com/forums/viewtopic.php?t=392
void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
{
if (rgba.a < 128)
{
dxtBlock->col0.u = 0;
dxtBlock->col1.u = 0;
dxtBlock->indices = 0xFFFFFFFF;
}
else
{
compressDXT1(rgba, dxtBlock);
}
}
// Brute force green channel compressor
void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
uint8 ming = 63;
uint8 maxg = 0;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
}
block->col0.r = 31;
block->col1.r = 31;
block->col0.g = maxg;
block->col1.g = ming;
block->col0.b = 0;
block->col1.b = 0;
if (maxg - ming > 4)
{
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
for (int g1 = ming; g1 < g0-4; g1++)
{
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
}
void OptimalCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
dxtBlock->alpha0 = quantize4(rgba.color(0).a);
dxtBlock->alpha1 = quantize4(rgba.color(1).a);
dxtBlock->alpha2 = quantize4(rgba.color(2).a);
dxtBlock->alpha3 = quantize4(rgba.color(3).a);
dxtBlock->alpha4 = quantize4(rgba.color(4).a);
dxtBlock->alpha5 = quantize4(rgba.color(5).a);
dxtBlock->alpha6 = quantize4(rgba.color(6).a);
dxtBlock->alpha7 = quantize4(rgba.color(7).a);
dxtBlock->alpha8 = quantize4(rgba.color(8).a);
dxtBlock->alpha9 = quantize4(rgba.color(9).a);
dxtBlock->alphaA = quantize4(rgba.color(10).a);
dxtBlock->alphaB = quantize4(rgba.color(11).a);
dxtBlock->alphaC = quantize4(rgba.color(12).a);
dxtBlock->alphaD = quantize4(rgba.color(13).a);
dxtBlock->alphaE = quantize4(rgba.color(14).a);
dxtBlock->alphaF = quantize4(rgba.color(15).a);
}
void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
{
uint8 mina = 255;
uint8 maxa = 0;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
mina = min(mina, alpha);
maxa = max(maxa, alpha);
}
dxtBlock->alpha0 = maxa;
dxtBlock->alpha1 = mina;
/*int centroidDist = 256;
int centroid;
// Get the closest to the centroid.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int dist = abs(alpha - (maxa + mina) / 2);
if (dist < centroidDist)
{
centroidDist = dist;
centroid = alpha;
}
}*/
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, dxtBlock);
int besta0 = maxa;
int besta1 = mina;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
//for (int a1 = mina; a1 < maxa; a1++)
{
//nvCheck(abs(a1-a0) > 8);
//if (abs(a0 - a1) < 8) continue;
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
if ((maxa-a0) + (a1-mina) > besterror)
continue;
dxtBlock->alpha0 = a0;
dxtBlock->alpha1 = a1;
int error = computeAlphaError(rgba, dxtBlock);
if (error < besterror)
{
besterror = error;
besta0 = a0;
besta1 = a1;
}
}
}
dxtBlock->alpha0 = besta0;
dxtBlock->alpha1 = besta1;
}
computeAlphaIndices(rgba, dxtBlock);
}

View File

@ -0,0 +1,49 @@
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_OPTIMALCOMPRESSDXT_H
#define NV_TT_OPTIMALCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
namespace OptimalCompress
{
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
}
} // nv namespace
#endif // NV_TT_OPTIMALCOMPRESSDXT_H

View File

@ -27,7 +27,7 @@
#include <nvimage/BlockDXT.h>
#include "QuickCompressDXT.h"
#include "SingleColorLookup.h"
#include "OptimalCompressDXT.h"
using namespace nv;
@ -288,122 +288,213 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
dxtBlock->indices = computeIndices3(block, a, b);
}*/
static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
namespace
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++)
static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint idx = block->index(i);
float alpha;
if (idx < 2) alpha = 1.0f - idx;
else alpha = (8.0f - idx) / 7.0f;
uint8 alphas[8];
block->evaluatePalette(alphas);
float beta = 1 - alpha;
uint totalError = 0;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * rgba.color(i).a;
betax_sum += beta * rgba.color(i).a;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
totalError += besterror;
block->setIndex(i, best);
}
return totalError;
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 < alpha1)
static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
swap(alpha0, alpha1);
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
// Flip indices:
for (int i = 0; i < 16; i++)
{
uint idx = block->index(i);
if (idx < 2) block->setIndex(i, 1 - idx);
else block->setIndex(i, 9 - idx);
float alpha;
if (idx < 2) alpha = 1.0f - idx;
else alpha = (8.0f - idx) / 7.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * rgba.color(i).a;
betax_sum += beta * rgba.color(i).a;
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 < alpha1)
{
swap(alpha0, alpha1);
// Flip indices:
for (int i = 0; i < 16; i++)
{
uint idx = block->index(i);
if (idx < 2) block->setIndex(i, 1 - idx);
else block->setIndex(i, 9 - idx);
}
}
else if (alpha0 == alpha1)
{
for (int i = 0; i < 16; i++)
{
block->setIndex(i, 0);
}
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
else if (alpha0 == alpha1)
/*
static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++)
{
block->setIndex(i, 0);
uint8 x = rgba.color(i).a;
if (x == 0 || x == 255) continue;
uint bits = block->index(i);
if (bits == 6 || bits == 7) continue;
float alpha;
if (bits == 0) alpha = 1.0f;
else if (bits == 1) alpha = 0.0f;
else alpha = (6.0f - block->index(i)) / 5.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * x;
betax_sum += beta * x;
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 > alpha1)
{
swap(alpha0, alpha1);
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
*/
block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
// Single color compressor, based on:
// https://mollyrocket.com/forums/viewtopic.php?t=392
void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
const uint64 mask = ~uint64(0xFFFF);
return (block0.u | mask) == (block1.u | mask);
}
}
} // namespace
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
// read block
Vector3 block[16];
extractColorBlockRGB(rgba, block);
// find min and max colors
Vector3 maxColor, minColor;
findMinMaxColorsBox(block, 16, &maxColor, &minColor);
selectDiagonal(block, 16, &maxColor, &minColor);
insetBBox(&maxColor, &minColor);
uint16 color0 = roundAndExpand(&maxColor);
uint16 color1 = roundAndExpand(&minColor);
if (color0 < color1)
if (rgba.isSingleColor())
{
swap(maxColor, minColor);
swap(color0, color1);
OptimalCompress::compressDXT1(rgba.color(0), dxtBlock);
}
else
{
// read block
Vector3 block[16];
extractColorBlockRGB(rgba, block);
dxtBlock->col0 = Color16(color0);
dxtBlock->col1 = Color16(color1);
dxtBlock->indices = computeIndices4(block, maxColor, minColor);
// find min and max colors
Vector3 maxColor, minColor;
findMinMaxColorsBox(block, 16, &maxColor, &minColor);
optimizeEndPoints4(block, dxtBlock);
selectDiagonal(block, 16, &maxColor, &minColor);
insetBBox(&maxColor, &minColor);
uint16 color0 = roundAndExpand(&maxColor);
uint16 color1 = roundAndExpand(&minColor);
if (color0 < color1)
{
swap(maxColor, minColor);
swap(color0, color1);
}
dxtBlock->col0 = Color16(color0);
dxtBlock->col1 = Color16(color1);
dxtBlock->indices = computeIndices4(block, maxColor, minColor);
optimizeEndPoints4(block, dxtBlock);
}
}
void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
if (!rgba.hasAlpha())
bool hasAlpha = false;
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a < 128) {
hasAlpha = true;
break;
}
}
if (!hasAlpha)
{
compressDXT1(rgba, dxtBlock);
}
// @@ Handle single RGB, with varying alpha? We need tables for single color compressor in 3 color mode.
//else if (rgba.isSingleColorNoAlpha()) { ... }
else
{
// read block
@ -436,160 +527,59 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
}
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
// Brute force green channel compressor
void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
uint8 ming = 63;
uint8 maxg = 0;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
}
block->col0.r = 31;
block->col1.r = 31;
block->col0.g = maxg;
block->col1.g = ming;
block->col0.b = 0;
block->col1.b = 0;
if (maxg - ming > 4)
{
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
for (int g1 = ming; g1 < g0-4; g1++)
{
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
}
void QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
dxtBlock->alpha0 = rgba.color(0).a >> 4;
dxtBlock->alpha1 = rgba.color(1).a >> 4;
dxtBlock->alpha2 = rgba.color(2).a >> 4;
dxtBlock->alpha3 = rgba.color(3).a >> 4;
dxtBlock->alpha4 = rgba.color(4).a >> 4;
dxtBlock->alpha5 = rgba.color(5).a >> 4;
dxtBlock->alpha6 = rgba.color(6).a >> 4;
dxtBlock->alpha7 = rgba.color(7).a >> 4;
dxtBlock->alpha8 = rgba.color(8).a >> 4;
dxtBlock->alpha9 = rgba.color(9).a >> 4;
dxtBlock->alphaA = rgba.color(10).a >> 4;
dxtBlock->alphaB = rgba.color(11).a >> 4;
dxtBlock->alphaC = rgba.color(12).a >> 4;
dxtBlock->alphaD = rgba.color(13).a >> 4;
dxtBlock->alphaE = rgba.color(14).a >> 4;
dxtBlock->alphaF = rgba.color(15).a >> 4;
}
void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
{
compressDXT1(rgba, &dxtBlock->color);
compressDXT3A(rgba, &dxtBlock->alpha);
OptimalCompress::compressDXT3A(rgba, &dxtBlock->alpha);
}
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount/*=8*/)
{
// @@ TODO
uint8 alpha0 = 0;
uint8 alpha1 = 255;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
alpha0 = max(alpha0, alpha);
alpha1 = min(alpha1, alpha);
}
AlphaBlockDXT5 block;
block.alpha0 = alpha0 - (alpha0 - alpha1) / 34;
block.alpha1 = alpha1 + (alpha0 - alpha1) / 34;
uint besterror = computeAlphaIndices(rgba, &block);
AlphaBlockDXT5 bestblock = block;
for (int i = 0; i < iterationCount; i++)
{
optimizeAlpha8(rgba, &block);
uint error = computeAlphaIndices(rgba, &block);
if (error >= besterror)
{
// No improvement, stop.
break;
}
if (sameIndices(block, bestblock))
{
bestblock = block;
break;
}
besterror = error;
bestblock = block;
};
// Copy best block to result;
*dxtBlock = bestblock;
}
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock)
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/)
{
compressDXT1(rgba, &dxtBlock->color);
compressDXT5A(rgba, &dxtBlock->alpha);
compressDXT5A(rgba, &dxtBlock->alpha, iterationCount);
}

View File

@ -37,16 +37,13 @@ namespace nv
namespace QuickCompress
{
void compressDXT1(const Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8);
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8);
}
} // nv namespace

File diff suppressed because it is too large Load Diff

View File

@ -159,7 +159,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
}
}
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16])
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
@ -189,6 +189,11 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
colorSums(colors, sums);
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
*sameColor = (axis == make_float3(0, 0, 0));
// Single color compressor needs unweighted colors.
if (*sameColor) colors[idx] = rawColors[idx];
dps[idx] = dot(rawColors[idx], axis);
#if __DEVICE_EMULATION__
@ -205,43 +210,6 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
}
}
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16])
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
__shared__ float dps[16];
if (idx < 16)
{
// Read color and copy to shared mem.
uint c = image[(bid) * 16 + idx];
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
// No need to synchronize, 16 < warp size.
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Sort colors along the best fit line.
colorSums(colors, sums);
float2 axis = bestFitLine(colors, sums[0]);
dps[idx] = dot(colors[idx], axis);
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
sortColors(dps, xrefs);
float2 tmp = colors[idx];
colors[xrefs[idx]] = tmp;
}
}
////////////////////////////////////////////////////////////////////////////////
// Round color to RGB565 and expand
@ -258,26 +226,6 @@ inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
return v;
}
inline __device__ float2 roundAndExpand56(float2 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 31.0f);
v.y = rintf(__saturatef(v.y) * 63.0f);
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5);
v.x *= 0.03227752766457f; // approximate integer bit expansion.
v.y *= 0.01583151765563f;
return v;
}
inline __device__ float2 roundAndExpand88(float2 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 255.0f);
v.y = rintf(__saturatef(v.y) * 255.0f);
*w = ((ushort)v.x << 8) | ((ushort)v.y);
v.x *= 1.0f / 255.0f;
v.y *= 1.0f / 255.0f;
return v;
}
////////////////////////////////////////////////////////////////////////////////
// Evaluate permutations
@ -521,114 +469,6 @@ __device__ float evalPermutation3(const float3 * colors, const float * weights,
}
*/
__device__ float evalPermutation4(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
__device__ float evalPermutation3(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable3[bits & 3] * colors[i];
akku += prods3[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 4.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 4.0f) * (e.x + e.y);
}
__device__ float evalPermutationCTX(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 8-8 color and expand...
a = roundAndExpand88(a, start);
b = roundAndExpand88(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
////////////////////////////////////////////////////////////////////////////////
// Evaluate all permutations
@ -757,14 +597,12 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
}
*/
__device__ void evalAllPermutations(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
__device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
__shared__ uint s_permutations[160];
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
@ -772,7 +610,6 @@ __device__ void evalAllPermutations(const float2 * colors, float2 colorSum, cons
ushort start, end;
uint permutation = permutations[pidx];
if (pidx < 160) s_permutations[pidx] = permutation;
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
@ -791,30 +628,6 @@ __device__ void evalAllPermutations(const float2 * colors, float2 colorSum, cons
bestPermutation ^= 0x55555555; // Flip indices.
}
for(int i = 0; i < 3; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 160) break;
ushort start, end;
uint permutation = s_permutations[pidx];
float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
if (bestStart > bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
}
}
}
errors[idx] = bestError;
}
@ -852,40 +665,6 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
errors[idx] = bestError;
}
__device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
float error = evalPermutationCTX(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
errors[idx] = bestError;
}
////////////////////////////////////////////////////////////////////////////////
// Find index with minimum error
@ -996,11 +775,6 @@ __device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xr
result[bid].y = indices;
}
__device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result)
{
saveBlockDXT1(start, end, permutation, xrefs, result);
}
__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
{
const int bid = blockIdx.x;
@ -1062,6 +836,39 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint
}
}
__global__ void compressLevel4DXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float3 colors[16];
__shared__ float3 sums[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalLevel4Permutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result)
{
@ -1069,11 +876,18 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
__shared__ float3 sums[16];
__shared__ float weights[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, weights, xrefs);
loadColorBlock(image, colors, sums, weights, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
@ -1092,61 +906,6 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
}
__global__ void compressNormalDXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressCTX1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutationsCTX(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockCTX1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
/*
__device__ float computeError(const float weights[16], uchar a0, uchar a1)
{
@ -1352,17 +1111,12 @@ extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result
compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressLevel4DXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressNormalDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressCTX1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}

View File

@ -24,20 +24,20 @@
#include <nvcore/Debug.h>
#include <nvcore/Containers.h>
#include <nvmath/Color.h>
#include <nvmath/Fitting.h>
#include <nvimage/Image.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvtt/CompressionOptions.h>
#include <nvtt/OutputOptions.h>
#include <nvtt/FastCompressDXT.h>
#include <nvtt/QuickCompressDXT.h>
#include <nvtt/OptimalCompressDXT.h>
#include "CudaCompressDXT.h"
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#endif
#include <time.h>
@ -48,16 +48,13 @@ using namespace nvtt;
#if defined HAVE_CUDA
//#define MAX_BLOCKS 32768U // 49152, 65535
#define MAX_BLOCKS 8192U // 49152, 65535
#define MAX_BLOCKS 8192U // 32768, 65535
extern "C" void setupCompressKernel(const float weights[3]);
extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
#include "Bitmaps.h" // @@ Rename to BitmapTable.h
@ -84,12 +81,15 @@ static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
#endif
CudaCompressor::CudaCompressor()
CudaCompressor::CudaCompressor() : m_bitmapTable(NULL), m_data(NULL), m_result(NULL)
{
#if defined HAVE_CUDA
// Allocate and upload bitmaps.
cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint));
cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
if (m_bitmapTable != NULL)
{
cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
}
// Allocate scratch buffers.
cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U);
@ -107,24 +107,38 @@ CudaCompressor::~CudaCompressor()
#endif
}
bool CudaCompressor::isValid() const
{
#if defined HAVE_CUDA
if (cudaGetLastError() != cudaSuccess)
{
return false;
}
#endif
return m_data != NULL && m_result != NULL && m_bitmapTable != NULL;
}
// @@ This code is very repetitive and needs to be cleaned up.
void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
/// Compress image using CUDA.
void CudaCompressor::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
@ -169,7 +183,7 @@ void CudaCompressor::compressDXT1(const Image * image, const OutputOptions::Priv
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
@ -183,18 +197,18 @@ void CudaCompressor::compressDXT1(const Image * image, const OutputOptions::Priv
/// Compress image using CUDA.
void CudaCompressor::compressDXT3(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage);
convertToBlockLinear(m_image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
@ -214,13 +228,20 @@ void CudaCompressor::compressDXT3(const Image * image, const OutputOptions::Priv
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
compressBlock(rgba, alphaBlocks + i);
OptimalCompress::compressDXT3A(rgba, alphaBlocks + i);
}
// Check for errors.
@ -252,7 +273,7 @@ void CudaCompressor::compressDXT3(const Image * image, const OutputOptions::Priv
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
@ -267,18 +288,18 @@ void CudaCompressor::compressDXT3(const Image * image, const OutputOptions::Priv
/// Compress image using CUDA.
void CudaCompressor::compressDXT5(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage);
convertToBlockLinear(m_image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
@ -298,13 +319,20 @@ void CudaCompressor::compressDXT5(const Image * image, const OutputOptions::Priv
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
compressBlock_Iterative(rgba, alphaBlocks + i);
QuickCompress::compressDXT5A(rgba, alphaBlocks + i);
}
// Check for errors.
@ -336,7 +364,7 @@ void CudaCompressor::compressDXT5(const Image * image, const OutputOptions::Priv
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
@ -350,323 +378,3 @@ void CudaCompressor::compressDXT5(const Image * image, const OutputOptions::Priv
}
void CudaCompressor::compressDXT1n(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
clock_t start = clock();
setupCompressKernel(compressionOptions.colorWeight.ptr());
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressNormalKernelDXT1(count, m_data, m_result, m_bitmapTable);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
void CudaCompressor::compressCTX1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
clock_t start = clock();
setupCompressKernel(compressionOptions.colorWeight.ptr());
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernelCTX1(count, m_data, m_result, m_bitmapTable);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
#if 0
class Task
{
public:
explicit Task(uint numBlocks) : blockMaxCount(numBlocks), blockCount(0)
{
// System memory allocations.
blockLinearImage = new uint[blockMaxCount * 16];
xrefs = new uint[blockMaxCount * 16];
// Device memory allocations.
cudaMalloc((void**) &d_blockLinearImage, blockMaxCount * 16 * sizeof(uint));
cudaMalloc((void**) &d_compressedImage, blockMaxCount * 8U);
// @@ Check for allocation errors.
}
~Task()
{
delete [] blockLinearImage;
delete [] xrefs;
cudaFree(d_blockLinearImage);
cudaFree(d_compressedImage);
}
void addColorBlock(const ColorBlock & rgba)
{
nvDebugCheck(!isFull());
// @@ Count unique colors?
/*
// Convert colors to vectors.
Array<Vector3> pointArray(16);
for(int i = 0; i < 16; i++) {
const Color32 color = rgba.color(i);
pointArray.append(Vector3(color.r, color.g, color.b));
}
// Find best fit line.
const Vector3 axis = Fit::bestLine(pointArray).direction();
// Project points to axis.
float dps[16];
uint * order = &xrefs[blockCount * 16];
for (uint i = 0; i < 16; ++i)
{
dps[i] = dot(pointArray[i], axis);
order[i] = i;
}
// Sort them.
for (uint i = 0; i < 16; ++i)
{
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
{
swap(dps[j], dps[j - 1]);
swap(order[j], order[j - 1]);
}
}
*/
// Write sorted colors to blockLinearImage.
for(uint i = 0; i < 16; ++i)
{
// blockLinearImage[blockCount * 16 + i] = rgba.color(order[i]);
blockLinearImage[blockCount * 16 + i] = rgba.color(i);
}
++blockCount;
}
bool isFull()
{
nvDebugCheck(blockCount <= blockMaxCount);
return blockCount == blockMaxCount;
}
void flush(const OutputOptions::Private & outputOptions)
{
if (blockCount == 0)
{
// Nothing to do.
return;
}
// Copy input color blocks.
cudaMemcpy(d_blockLinearImage, blockLinearImage, blockCount * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernelDXT1(blockCount, d_blockLinearImage, d_compressedImage, d_bitmaps);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
uint * compressedImage = blockLinearImage;
cudaMemcpy(compressedImage, d_compressedImage, blockCount * 8, cudaMemcpyDeviceToHost);
// @@ Sort block indices.
// Output result.
if (outputOptions.outputHandler != NULL)
{
// outputOptions.outputHandler->writeData(compressedImage, blockCount * 8);
}
blockCount = 0;
}
private:
const uint blockMaxCount;
uint blockCount;
uint * blockLinearImage;
uint * xrefs;
uint * d_blockLinearImage;
uint * d_compressedImage;
};
void nv::cudaCompressDXT1_2(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
#if defined HAVE_CUDA
const uint w = image->width();
const uint h = image->height();
const uint blockNum = ((w + 3) / 4) * ((h + 3) / 4);
const uint blockMax = 32768; // 49152, 65535
setupCompressKernelDXT1(compressionOptions.colorWeight.ptr());
ColorBlock rgba;
Task task(min(blockNum, blockMax));
clock_t start = clock();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
task.addColorBlock(rgba);
if (task.isFull())
{
task.flush(outputOptions);
}
}
}
task.flush(outputOptions);
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
#endif // 0

View File

@ -37,17 +37,22 @@ namespace nv
CudaCompressor();
~CudaCompressor();
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1n(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressCTX1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
bool isValid() const;
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
uint * m_bitmapTable;
uint * m_data;
uint * m_result;
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
} // nv namespace

View File

@ -87,64 +87,6 @@ inline __device__ __host__ bool operator ==(float3 a, float3 b)
return a.x == b.x && a.y == b.y && a.z == b.z;
}
// float2 operators
inline __device__ __host__ float2 operator *(float2 a, float2 b)
{
return make_float2(a.x*b.x, a.y*b.y);
}
inline __device__ __host__ float2 operator *(float f, float2 v)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator *(float2 v, float f)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator +(float2 a, float2 b)
{
return make_float2(a.x+b.x, a.y+b.y);
}
inline __device__ __host__ void operator +=(float2 & b, float2 a)
{
b.x += a.x;
b.y += a.y;
}
inline __device__ __host__ float2 operator -(float2 a, float2 b)
{
return make_float2(a.x-b.x, a.y-b.y);
}
inline __device__ __host__ void operator -=(float2 & b, float2 a)
{
b.x -= a.x;
b.y -= a.y;
}
inline __device__ __host__ float2 operator /(float2 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float2 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
}
inline __device__ __host__ float dot(float2 a, float2 b)
{
return a.x * b.x + a.y * b.y;
}
inline __device__ __host__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
@ -206,7 +148,7 @@ inline __device__ bool singleColor(const float3 * colors)
bool sameColor = false;
for (int i = 0; i < 16; i++)
{
sameColor &= (colors[idx] == colors[0]);
sameColor &= (colors[i] == colors[0]);
}
return sameColor;
#else
@ -301,89 +243,5 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, fl
return firstEigenVector(covariance);
}
// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
{
// @@ 8 iterations is probably more than enough.
float2 v = make_float2(1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1];
float y = v.x * matrix[1] + v.y * matrix[2];
float m = max(x, y);
float iv = 1.0f / m;
if (m == 0.0f) iv = 0.0f;
v = make_float2(x*iv, y*iv);
}
return v;
}
inline __device__ void colorSums(const float2 * colors, float2 * sums)
{
#if __DEVICE_EMULATION__
float2 color_sum = make_float2(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx];
sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1];
#endif
}
inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[3] = {0, 0, 0};
for (int i = 0; i < 16; i++)
{
float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[3] += a.y * a.y;
}
#else
const int idx = threadIdx.x;
float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
__shared__ float covariance[16*3];
covariance[3 * idx + 0] = diff.x * diff.x;
covariance[3 * idx + 1] = diff.x * diff.y;
covariance[3 * idx + 2] = diff.y * diff.y;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector2D(covariance);
}
#endif // CUDAMATH_H

View File

@ -22,15 +22,18 @@
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda_runtime.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
using namespace nv;
using namespace cuda;
/* @@ Move this to win32 utils or somewhere else.
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
@ -52,31 +55,93 @@ static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return true;
}
}
return !bIsWow64;
return !bIsWow64;
}
#endif
*/
static bool isCudaDriverAvailable(int version)
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
nvDebug("*** CUDA driver not found.\n");
return false;
}
if (version >= 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.0.\n");
return false;
}
}
if (version >= 2010)
{
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.1.\n");
return false;
}
}
if (version >= 2020)
{
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
if (driverGetVersion == NULL) {
nvDebug("*** CUDA driver version < 2.2.\n");
return false;
}
int driverVersion;
CUresult err = driverGetVersion(&driverVersion);
if (err != CUDA_SUCCESS) {
nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
return false;
}
return driverVersion >= version;
}
#endif // HAVE_CUDA
return true;
}
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
if (isWindowsVista()) return false;
//if (isWindowsVista() || !isWow32()) return false;
#endif
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
return false;
}
int count = deviceCount();
if (count == 1)
{
@ -89,10 +154,10 @@ bool nv::cuda::isHardwarePresent()
{
return false;
}
// @@ Make sure that warp size == 32
}
// @@ Make sure that warp size == 32
return count > 0;
#else
return false;
@ -115,14 +180,60 @@ int nv::cuda::deviceCount()
return 0;
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = 0;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
int current_device = 0;
while (current_device < device_count)
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, current_device);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major != -1 && device_properties.minor != -1)
{
if( gflops > max_gflops )
{
max_gflops = gflops;
max_gflops_device = current_device;
}
}
current_device++;
}
#endif
return max_gflops_device;
}
/// Activate the given devices.
bool nv::cuda::setDevice(int i)
{
nvCheck(i < deviceCount());
#if defined HAVE_CUDA
cudaError_t result = cudaSetDevice(i);
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
return result == cudaSuccess;
#else
return false;
#endif
}
void nv::cuda::exit()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
#endif
}

View File

@ -31,7 +31,9 @@ namespace nv
{
bool isHardwarePresent();
int deviceCount();
int getFastestDevice();
bool setDevice(int i);
void exit();
};
} // nv namespace

View File

@ -49,6 +49,14 @@
#define NVTT_VERSION 200
#define NVTT_DECLARE_PIMPL(Class) \
private: \
Class(const Class &); \
void operator=(const Class &); \
public: \
struct Private; \
Private & m
// Public interface.
namespace nvtt
@ -75,9 +83,6 @@ namespace nvtt
Format_BC3n = Format_DXT5n,
Format_BC4, // ATI1
Format_BC5, // 3DC, ATI2
Format_DXT1n,
Format_CTX1,
};
/// Quality modes.
@ -92,6 +97,8 @@ namespace nvtt
/// Compression options. This class describes the desired compression format and other compression settings.
struct CompressionOptions
{
NVTT_DECLARE_PIMPL(CompressionOptions);
NVTT_API CompressionOptions();
NVTT_API ~CompressionOptions();
@ -107,10 +114,6 @@ namespace nvtt
NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask);
NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
//private:
struct Private;
Private & m;
};
@ -173,6 +176,8 @@ namespace nvtt
/// Input options. Specify format and layout of the input texture.
struct InputOptions
{
NVTT_DECLARE_PIMPL(InputOptions);
NVTT_API InputOptions();
NVTT_API ~InputOptions();
@ -217,10 +222,6 @@ namespace nvtt
// Set resizing options.
NVTT_API void setMaxExtents(int d);
NVTT_API void setRoundMode(RoundMode mode);
//private:
struct Private;
Private & m;
};
@ -261,6 +262,8 @@ namespace nvtt
/// the compressor to the user.
struct OutputOptions
{
NVTT_DECLARE_PIMPL(OutputOptions);
NVTT_API OutputOptions();
NVTT_API ~OutputOptions();
@ -272,16 +275,14 @@ namespace nvtt
NVTT_API void setOutputHandler(OutputHandler * outputHandler);
NVTT_API void setErrorHandler(ErrorHandler * errorHandler);
NVTT_API void setOutputHeader(bool outputHeader);
//private:
struct Private;
Private & m;
};
/// Texture compressor.
struct Compressor
{
NVTT_DECLARE_PIMPL(Compressor);
NVTT_API Compressor();
NVTT_API ~Compressor();
@ -293,10 +294,6 @@ namespace nvtt
// Estimate the size of compressing the input with the given options.
NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const;
//private:
struct Private;
Private & m;
};

View File

@ -207,7 +207,6 @@ NVTT_API void nvttDestroyCompressionOptions(NvttCompressionOptions * compression
NVTT_API void nvttSetCompressionOptionsFormat(NvttCompressionOptions * compressionOptions, NvttFormat format);
NVTT_API void nvttSetCompressionOptionsQuality(NvttCompressionOptions * compressionOptions, NvttQuality quality);
NVTT_API void nvttSetCompressionOptionsColorWeights(NvttCompressionOptions * compressionOptions, float red, float green, float blue, float alpha);
NVTT_API void nvttEnableCompressionOptionsCudaCompression(NvttCompressionOptions * compressionOptions, NvttBoolean enable);
NVTT_API void nvttSetCompressionOptionsPixelFormat(NvttCompressionOptions * compressionOptions, unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask);
NVTT_API void nvttSetCompressionOptionsQuantization(NvttCompressionOptions * compressionOptions, NvttBoolean colorDithering, NvttBoolean alphaDithering, NvttBoolean binaryAlpha, int alphaThreshold);

View File

@ -1,13 +1,8 @@
PROJECT(squish)
ENABLE_TESTING()
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SQUISH_SRCS
# alpha.cpp
# alpha.h
# clusterfit.cpp
# clusterfit.h
fastclusterfit.cpp
fastclusterfit.h
weightedclusterfit.cpp
@ -21,32 +16,13 @@ SET(SQUISH_SRCS
config.h
maths.cpp
maths.h
# rangefit.cpp
# rangefit.h
# singlecolourfit.cpp
# singlecolourfit.h
# singlecolourlookup.inl
# squish.cpp
# squish.h
simd.h
simd_sse.h
simd_ve.h)
ADD_LIBRARY(squish STATIC ${SQUISH_SRCS})
# libpng
#FIND_PACKAGE(PNG)
#IF(PNG_FOUND)
# INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR})
# ADD_EXECUTABLE(squishpng extra/squishpng.cpp)
# TARGET_LINK_LIBRARIES(squishpng squish ${PNG_LIBRARY})
#ENDIF(PNG_FOUND)
##ADD_EXECUTABLE(squishgen extra/squishgen.cpp)
#ADD_EXECUTABLE(squishtest extra/squishtest.cpp)
#TARGET_LINK_LIBRARIES(squishtest squish)
#ADD_TEST(SQUISHTEST squishtest)
IF(CMAKE_COMPILER_IS_GNUCXX)
SET_TARGET_PROPERTIES(squish PROPERTIES COMPILE_FLAGS -fPIC)
ENDIF(CMAKE_COMPILER_IS_GNUCXX)

View File

@ -29,6 +29,8 @@
#include "colourblock.h"
#include <cfloat>
#include "fastclusterlookup.inl"
namespace squish {
FastClusterFit::FastClusterFit()
@ -97,91 +99,6 @@ void FastClusterFit::SetColourSet( ColourSet const* colours, int flags )
}
struct Precomp {
float alpha2_sum;
float beta2_sum;
float alphabeta_sum;
float factor;
};
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
void FastClusterFit::DoPrecomputation()
{
int i = 0;
// Three element clusters:
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
int c2 = 16 - c0 - c1;
/*if (c2 == 16) {
// a = b = x2 / 16
s_threeElement[i].alpha2_sum = 0;
s_threeElement[i].beta2_sum = 16;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_threeElement[i].alpha2_sum = 16;
s_threeElement[i].beta2_sum = 0;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_threeElement[i].alpha2_sum = c0 + c1 * 0.25f;
s_threeElement[i].beta2_sum = c2 + c1 * 0.25f;
s_threeElement[i].alphabeta_sum = c1 * 0.25f;
s_threeElement[i].factor = 1.0f / (s_threeElement[i].alpha2_sum * s_threeElement[i].beta2_sum - s_threeElement[i].alphabeta_sum * s_threeElement[i].alphabeta_sum);
}
i++;
}
}
//printf("%d three cluster elements\n", i);
// Four element clusters:
i = 0;
for( int c0 = 0; c0 <= 16; c0++)
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
int c3 = 16 - c0 - c1 - c2;
/*if (c3 == 16) {
// a = b = x3 / 16
s_fourElement[i].alpha2_sum = 16.0f;
s_fourElement[i].beta2_sum = 0.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_fourElement[i].alpha2_sum = 0.0f;
s_fourElement[i].beta2_sum = 16.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_fourElement[i].alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
s_fourElement[i].beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
s_fourElement[i].alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
s_fourElement[i].factor = 1.0f / (s_fourElement[i].alpha2_sum * s_fourElement[i].beta2_sum - s_fourElement[i].alphabeta_sum * s_fourElement[i].alphabeta_sum);
}
i++;
}
}
}
//printf("%d four cluster elements\n", i);
}
void FastClusterFit::SetMetric(float r, float g, float b)
{
#if SQUISH_USE_SIMD

View File

@ -44,8 +44,6 @@ public:
void SetMetric(float r, float g, float b);
float GetBestError() const;
static void DoPrecomputation();
// Make them public
virtual void Compress3( void* block );
virtual void Compress4( void* block );

File diff suppressed because it is too large Load Diff

View File

@ -50,6 +50,16 @@ public:
return *this;
}
Vec4( const float * v )
{
union { vector float v; float c[4]; } u;
u.c[0] = v[0];
u.c[1] = v[1];
u.c[2] = v[2];
u.c[3] = v[3];
m_v = u.v;
}
Vec4( float x, float y, float z, float w )
{
union { vector float v; float c[4]; } u;

View File

@ -1,221 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <stdio.h> // printf
#include <stdlib.h> // rand
#include <time.h> // clock
#include <string.h> // memcpy, memcmp
#include <assert.h>
#define FRAME_COUNT 1000
#define WIDTH 2048
#define HEIGHT 2048
#define INPUT_SIZE (WIDTH*HEIGHT)
#define OUTPUT_SIZE (WIDTH*HEIGHT/16*2)
static int s_input[INPUT_SIZE];
static int s_reference[OUTPUT_SIZE];
static int s_output[OUTPUT_SIZE];
static int s_frame = 0;
struct MyOutputHandler : public nvtt::OutputHandler
{
MyOutputHandler() : m_ptr(NULL) {}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{
assert(size == sizeof(int) * OUTPUT_SIZE);
assert(width == WIDTH);
assert(height == HEIGHT);
assert(depth == 1);
assert(face == 0);
assert(miplevel == 0);
m_ptr = (unsigned char *)s_output;
if (s_frame == 1)
{
// Save first result as reference.
memcpy(s_reference, s_output, sizeof(int) * OUTPUT_SIZE);
}
else if (s_frame > 1)
{
// Compare against reference.
if (memcmp(s_output, s_reference, sizeof(int) * OUTPUT_SIZE) != 0)
{
printf("Compressed image different to original.\n");
exit(EXIT_FAILURE);
}
}
}
virtual bool writeData(const void * data, int size)
{
memcpy(m_ptr, data, size);
m_ptr += size;
return true;
}
unsigned char * m_ptr;
};
void precomp()
{
unsigned int bitmaps[1024];
int num = 0;
printf("{\n");
printf("\t%8X,\n", 0);
bitmaps[0] = 0;
num = 1;
for (int a = 1; a <= 15; a++)
{
for (int b = a; b <= 15; b++)
{
for (int c = b; c <= 15; c++)
{
int indices[16];
int i = 0;
for(; i < a; i++) {
indices[i] = 0;
}
for(; i < a+b; i++) {
indices[i] = 2;
}
for(; i < a+b+c; i++) {
indices[i] = 3;
}
for(; i < 16; i++) {
indices[i] = 1;
}
unsigned int bm = 0;
for(i = 0; i < 16; i++) {
bm |= indices[i] << (i * 2);
}
printf("\t0x%8X, // %d %d %d %d\n", bm, a-0, b-a, c-b, 16-c);
bitmaps[num] = bm;
num++;
}
}
}
printf("}\n");
printf("// num = %d\n", num);
/*
for( int i = imax; i >= 0; --i )
{
// second cluster [i,j) is one third along
for( int m = i; m < 16; ++m )
{
indices[m] = 2;
}
const int jmax = ( i == 0 ) ? 15 : 16;
for( int j = jmax; j >= i; --j )
{
// third cluster [j,k) is two thirds along
for( int m = j; m < 16; ++m )
{
indices[m] = 3;
}
int kmax = ( j == 0 ) ? 15 : 16;
for( int k = kmax; k >= j; --k )
{
// last cluster [k,n) is at the end
if( k < 16 )
{
indices[k] = 1;
}
uint bitmap = 0;
bool hasThree = false;
for(int p = 0; p < 16; p++) {
bitmap |= indices[p] << (p * 2);
}
bitmaps[num] = bitmap;
num++;
}
}
}
*/
}
int main(int argc, char *argv[])
{
//precomp();
nvtt::InputOptions inputOptions;
inputOptions.setTextureLayout(nvtt::TextureType_2D, WIDTH, HEIGHT);
for (int i = 0; i < INPUT_SIZE; i++)
{
s_input[i] = rand();
}
inputOptions.setMipmapData(s_input, WIDTH, HEIGHT);
inputOptions.setMipmapGeneration(false);
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(nvtt::Format_DXT1);
// compressionOptions.setFormat(nvtt::Format_DXT1n);
// compressionOptions.setFormat(nvtt::Format_CTX1);
nvtt::OutputOptions outputOptions;
outputOptions.setOutputHeader(false);
MyOutputHandler outputHandler;
outputOptions.setOutputHandler(&outputHandler);
nvtt::Compressor compressor;
for (s_frame = 0; s_frame < FRAME_COUNT; s_frame++)
{
clock_t start = clock();
printf("compressing frame %d:\n", s_frame);
compressor.process(inputOptions, compressionOptions, outputOptions);
clock_t end = clock();
printf("time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
}
return EXIT_SUCCESS;
}

View File

@ -42,11 +42,11 @@ struct MyOutputHandler : public nvtt::OutputHandler
MyOutputHandler(const char * name) : total(0), progress(0), percentage(0), stream(new nv::StdOutputStream(name)) {}
virtual ~MyOutputHandler() { delete stream; }
virtual void setTotal(int64 t)
void setTotal(int64 t)
{
total = t + 128;
}
virtual void setDisplayProgress(bool b)
void setDisplayProgress(bool b)
{
verbose = b;
}
@ -373,7 +373,6 @@ int main(int argc, char *argv[])
inputOptions.setMipmapGeneration(false);
}
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(format);
if (fast)
@ -409,6 +408,16 @@ int main(int argc, char *argv[])
nvtt::Compressor compressor;
compressor.enableCudaAcceleration(!nocuda);
printf("CUDA acceleration ");
if (compressor.isCudaAccelerationEnabled())
{
printf("ENABLED\n\n");
}
else
{
printf("DISABLED\n\n");
}
outputHandler.setTotal(compressor.estimateSize(inputOptions, compressionOptions));
outputHandler.setDisplayProgress(!silent);

View File

@ -84,7 +84,7 @@ struct Error
{
mabse /= samples;
mse /= samples;
rmse = sqrt(mse);
rmse = sqrtf(mse);
psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
}
@ -134,7 +134,7 @@ struct NormalError
{
ade /= samples;
mse /= samples * 3;
rmse = sqrt(mse);
rmse = sqrtf(mse);
psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
}
}

View File

@ -73,10 +73,12 @@ int main(int argc, char *argv[])
float scale = 0.5f;
float gamma = 2.2f;
nv::Filter * filter = NULL;
nv::AutoPtr<nv::Filter> filter;
nv::Path input;
nv::Path output;
nv::FloatImage::WrapMode wrapMode = nv::FloatImage::WrapMode_Mirror;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
@ -108,9 +110,18 @@ int main(int argc, char *argv[])
else if (strcmp("lanczos", argv[i]) == 0) filter = new nv::LanczosFilter();
else if (strcmp("kaiser", argv[i]) == 0) {
filter = new nv::KaiserFilter(3);
((nv::KaiserFilter *)filter)->setParameters(4.0f, 1.0f);
((nv::KaiserFilter *)filter.ptr())->setParameters(4.0f, 1.0f);
}
}
else if (strcmp("-w", argv[i]) == 0)
{
if (i+1 == argc) break;
i++;
if (strcmp("mirror", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Mirror;
else if (strcmp("repeat", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Repeat;
else if (strcmp("clamp", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Clamp;
}
else if (argv[i][0] != '-')
{
input = argv[i];
@ -140,6 +151,10 @@ int main(int argc, char *argv[])
printf(" * mitchell\n");
printf(" * lanczos\n");
printf(" * kaiser\n");
printf(" -w mode One of the following: (default = 'mirror')\n");
printf(" * mirror\n");
printf(" * repeat\n");
printf(" * clamp\n");
return 1;
}
@ -155,15 +170,14 @@ int main(int argc, char *argv[])
nv::FloatImage fimage(&image);
fimage.toLinear(0, 3, gamma);
nv::AutoPtr<nv::FloatImage> fresult(fimage.downSample(*filter, uint(image.width() * scale), uint(image.height() * scale), nv::FloatImage::WrapMode_Mirror));
nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(*filter, uint(image.width() * scale), uint(image.height() * scale), wrapMode));
nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
result->setFormat(nv::Image::Format_ARGB);
nv::StdOutputStream stream(output);
nv::ImageIO::saveTGA(stream, result.ptr()); // @@ Add generic save function. Add support for png too.
delete filter;
return 0;
}