diff --git a/CMakeLists.txt b/CMakeLists.txt index e3d3438..51f1253 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,17 +6,9 @@ SET(NV_CMAKE_DIR "${NV_SOURCE_DIR}/cmake") SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${NV_CMAKE_DIR}") IF(WIN32) - # gnuwin32 paths: - SET(GNUWIN32_PATH "${NV_SOURCE_DIR}/extern/gnuwin32") - SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${GNUWIN32_PATH}/include") - SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${GNUWIN32_PATH}/lib") - - # Set GLUT path: - SET(GLUT_ROOT_DIR "${NV_SOURCE_DIR}/extern/glut") - - # Set FreeImage path: - SET(FREEIMAGE_ROOT_DIR "${NV_SOURCE_DIR}/extern/FreeImage") - + SET(GNUWIN32 "${NV_SOURCE_DIR}/gnuwin32") + SET(CMAKE_INCLUDE_PATH "${GNUWIN32}/include") + SET(CMAKE_LIBRARY_PATH "${GNUWIN32}/lib") ENDIF(WIN32) INCLUDE(${NV_CMAKE_DIR}/OptimalOptions.cmake) @@ -30,41 +22,9 @@ IF(NVTT_SHARED) SET(NVIMAGE_SHARED TRUE) ENDIF(NVTT_SHARED) -ADD_SUBDIRECTORY(extern) ADD_SUBDIRECTORY(src) -# These files should only be installed when creating packages. -INSTALL(FILES - NVIDIA_Texture_Tools_LICENSE.txt - NVIDIA_Texture_Tools_README.txt - ChangeLog - DESTINATION share/doc/nvtt) - -# Add packaging support -INCLUDE(InstallRequiredSystemLibraries) - -IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") - SET(CPACK_GENERATOR "TGZ;DEB") -ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") - -SET(CPACK_PACKAGE_NAME "nvidia-texture-tools") -SET(CPACK_PACKAGE_VERSION_MAJOR "2") -SET(CPACK_PACKAGE_VERSION_MINOR "1") -SET(CPACK_PACKAGE_VERSION_PATCH "0") -SET(CPACK_PACKAGE_VERSION "2.1.0") -SET(CPACK_PACKAGE_CONTACT "Ignacio Castaņo ") -SET(CPACK_PACKAGE_VENDOR "NVIDIA Corporation") -SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "GPU-accelerated Texture Tools with support for DirectX 10 texture formats") - -SET(CPACK_PACKAGE_DESCRIPTION_FILE "${NV_SOURCE_DIR}/NVIDIA_Texture_Tools_README.txt") -SET(CPACK_RESOURCE_FILE_LICENSE "${NV_SOURCE_DIR}/NVIDIA_Texture_Tools_LICENSE.txt") - -# NSIS options: IF(WIN32) - SET(CPACK_NSIS_DISPLAY_NAME "${CPACK_PACKAGE_VENDOR}\\\\NVIDIA Texture Tools 2.1") - SET(CPACK_PACKAGE_INSTALL_DIRECTORY "${CPACK_PACKAGE_VENDOR}\\\\NVIDIA Texture Tools 2.1") - SET(CPACK_PACKAGE_ICON "${NV_SOURCE_DIR}\\\\project\\\\vc8\\\\nvcompress\\\\nvidia.ico") + ADD_SUBDIRECTORY(gnuwin32) ENDIF(WIN32) - -INCLUDE(CPack) diff --git a/ChangeLog b/ChangeLog index 53fd75e..f2ef204 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,21 +1,8 @@ -NVIDIA Texture Tools version 2.1.0 - * CTX1 CUDA compressor. - * DXT1n CUDA compressor. - * Support alpha premultiplication by Charles Nicholson. See issue 30. - * Improved decompressor tool submitted by Amorilia. See issue 41. - * Add support for YCoCg color transform. Fixes issue 18. - * Add support for linear and swizzle transforms. Fixes issue 4. - * Fix loading of EXR files using OpenEXR. - * Use FreeImage as primary image loading library. Fixes issue 31. Reverted. - * Output swizzle codes like AMD's tools. - * Added support for saving PNGs by Frank Richter. Fixes issue 79 and 80. - * Added gnome thumbnailer by Frank Richter. Fixes issue 82. - * Cleanup sources removing files that are not strictly required. - NVIDIA Texture Tools version 2.0.7 * Output correct exit codes. Fixes issue 92. * Fix thread-safety errors. Fixes issue 90. * Add SIMD power method. Fixes issue 94. + * Interact better with applications that already use CUDA. NVIDIA Texture Tools version 2.0.6 * Fix dll version checking. diff --git a/NVIDIA_Texture_Tools_README.txt b/NVIDIA_Texture_Tools_README.txt index 9e7908d..93a26c1 100644 --- a/NVIDIA_Texture_Tools_README.txt +++ b/NVIDIA_Texture_Tools_README.txt @@ -2,7 +2,7 @@ -------------------------------------------------------------------------------- NVIDIA Texture Tools README.txt -Version 2.1 +Version 2.0 -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- @@ -20,16 +20,13 @@ VI. Frequently Asked Questions I. Introduction -------------------------------------------------------------------------------- -The NVIDIA Texture Tools is a collection of image processing and texture -manipulation tools, designed to be integrated in game tools and asset -conditioning pipelines. +This is our first alpha release of our new Texture Tools. The main highlights of +this release are support for all DX10 texture formats, higher speed and improved +compression quality. -The primary features of the library are mipmap and normal map generation, format -conversion and DXT compression. - -DXT compression is based on Simon Brown's squish library. The library also -contains an alternative GPU-accelerated compressor that uses CUDA and on some -GPUs is one order of magnitude faster. +In addition to that it also comes with a hardware accelerated compressor that +uses CUDA to compress blocks in parallel on the GPU and runs around 10 times +faster than the CPU counterpart. You can obtain CUDA from our developer site at: @@ -38,29 +35,12 @@ http://developer.nvidia.com/object/cuda.html The source code of the Texture Tools is being released under the terms of the MIT license. -The latest installer is available at the official website: - -http://developer.nvidia.com/object/texture_tools.html - -Documentation and additional information can be found at the development site: - -http://code.google.com/p/nvidia-texture-tools/ - II. Contents -------------------------------------------------------------------------------- This release contains only the source code of the texture compression library -and several command line applications that serve as examples of how to use the -library. - -Documentation for the texture compression library is available at: - -http://code.google.com/p/nvidia-texture-tools/wiki/ApiDocumentation - -A brief description of the command line tools can be found at: - -http://code.google.com/p/nvidia-texture-tools/wiki/CommandLineTools +and an example commandline application that shows its use. III. Compilation Instructions @@ -98,11 +78,7 @@ The following file contains a simple example that shows how to use the library: src/nvimage/nvtt/compress.cpp -Detailed documentation of the API can be found at: - -http://code.google.com/p/nvidia-texture-tools/wiki/ApiDocumentation - -The usage of the commandline compression tool is the following: +The usage of the commandline tool is the following: $ nvcompress [options] infile [outfile] @@ -110,33 +86,24 @@ where 'infile' is and TGA, PNG, PSD, DDS or JPG file, 'outfile' is a DDS file and 'options' is one or more of the following: Input options: - -color The input image is a color map (default). - -alpha The input image has an alpha channel used for transparency. - -normal The input image is a normal map. - -tonormal Convert input to normal map. - -clamp Clamp wrapping mode (default). - -repeat Repeat wrapping mode. - -nomips Disable mipmap generation. - -premula Premultiply alpha into color channel. - -mipfilter Mipmap filter. One of the following: box, triangle, kaiser. + -color The input image is a color map (default). + -normal The input image is a normal map. + -tonormal Convert input to normal map. + -clamp Clamp wrapping mode (default). + -repeat Repeat wrapping mode. + -nomips Disable mipmap generation. Compression options: -fast Fast compression. -nocuda Do not use cuda compressor. -rgb RGBA format -bc1 BC1 format (DXT1) - -bc1n BC1 normal map format (DXT1nm) - -bc1a BC1 format with binary alpha (DXT1a) -bc2 BC2 format (DXT3) -bc3 BC3 format (DXT5) - -bc3n BC3 normal map format (DXT5nm) + -bc3n BC3 normal map format (DXT5n/RXGB) -bc4 BC4 format (ATI1) -bc5 BC5 format (3Dc/ATI2) -Output options: - -silent Do not output progress messages - -dds10 Use DirectX 10 DDS format - In order to run the compiled example on a PC that doesn't have Microsoft Visual Studio 2003 installed, you will have to install the Microsoft Visual Studio 2003 redistributable package that you can download at: @@ -147,41 +114,54 @@ http://go.microsoft.com/fwlink/?linkid=65127&clcid=0x409 V. Known Issues -------------------------------------------------------------------------------- -None so far. Please file suggestions and bug reports at: - -http://code.google.com/p/nvidia-texture-tools/issues/list - -or contact us directly: +None so far. Please send suggestions and bug reports to: TextureTools@nvidia.com +or report them at: + +http://code.google.com/p/nvidia-texture-tools/issues/list VI. Frequently Asked Questions -------------------------------------------------------------------------------- -=== Where can I ask questions? How can I get support? === -You can ask questions about the usage of the Texture Tools at the [http://developer.nvidia.com/forums/index.php?showforum=9 NVIDIA developer forums]. You can report bugs and request new features in our [http://code.google.com/p/nvidia-texture-tools/issues/list issue database]. If you are a developer and have questions about the API or the source code, feel free to drop by the [http://groups.google.com/group/nvidia-texture-tools developer's mailing list]. If you would like to contact us privately, please send an email to [mailto:TextureTools@nvidia.com TextureTools@nvidia.com]. +- Do the NVIDIA Texture Tools work on OSX? +It currently compiles and runs properly, but it has not been tested extensively. +In particular there may be endiannes errors in the code. + -=== Why is feature XYZ not supported? === -In order to keep the code small and reduce maintenance costs we have limited the features available in our new texture tools. For this reason, we have also open sourced the code, so that people can modify it and add their own custom features. +- Do the NVIDIA Texture Tools work on Linux? +Yes. -=== What platforms do the NVIDIA Texture Tools support? === -The tools are compiled and tested regularly on Linux, OSX, and Windows. Some platforms are tested more frequently than others and there may be bugs on some uncommon configurations. -=== Is CUDA required? === -No. The Visual Studio solution file contains a configuration that allows you to compile the texture tools without CUDA support. The cmake scripts automatically detect the CUDA installation and use it only when available. +- Do the NVIDIA Texture Tools work on Vista? +Yes, but note that CUDA is not supported on Vista yet, so the tool is not hardware +accelerated. -Even if the texture tools are compiled with CUDA support it's possible to use them on systems that do not support CUDA or that do not have a valid CUDA driver installed. -=== Where can I get CUDA? === -At [http://developer.nvidia.com/object/cuda.html] +- Is CUDA required? +No. The Visual Studio solution file contains a configuration that allows you +to compile the texture tools without CUDA support. The cmake scripts automatically +detect the CUDA installation and use it only when available. + + +- Where can I get CUDA? +http://developer.nvidia.com/object/cuda.html -=== Can I use the NVIDIA Texture Tools in my commercial application? === + +- Why is feature XYZ not supported? +In order to keep the code small and reduce maintenance costs we have limited the +features available in our new texture tools. We also have open sourced the code, so +that people can modify it and add their own favourite features. + + +- Can I use the NVIDIA Texture Tools in my commercial application? Yes, the NVIDIA Texture Tools are licensed under the MIT license. -=== Can I use the NVIDIA Texture Tools in my GPL application? === + +- Can I use the NVIDIA Texture Tools in my GPL application? Yes, the MIT license is compatible with the GPL and LGPL licenses. -=== Can I use the NVIDIA Texture Tools in the US? Do I have to obtain a license of the S3TC patent (US patent 5,956,431)? === -NVIDIA has a license of the S3TC patent that covers all our products, including our Texture Tools. You don't have to obtain a license of the S3TC patent to use any of NVIDIA's products, but certain uses of NVIDIA Texture Tools source code cannot be considered NVIDIA products anymore. Keep in mind that the NVIDIA Texture Tools are licensed under the MIT license and thus are provided without warranty of any kind. + + diff --git a/VERSION b/VERSION index 7ec1d6d..f1547e6 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1.0 +2.0.7 diff --git a/buildpkg b/buildpkg index 0bf33b1..1622f8b 100755 --- a/buildpkg +++ b/buildpkg @@ -2,6 +2,6 @@ tar zcvf nvidia-texture-tools-`cat VERSION`.tar.gz \ --exclude '.*' --exclude debian --exclude '*~' --exclude buildpkg \ - --exclude 'build-*' --exclude data --exclude tags --exclude Makefile \ - --exclude qtcreator-build --exclude 'nvidia-texture-tools-*.tar.gz' \ - --exclude '*.user' --transform 's,^,nvidia-texture-tools/,' * + --exclude build --exclude data --exclude tags --exclude Makefile \ + --exclude 'nvidia-texture-tools-*.tar.gz' \ + --transform 's,^,nvidia-texture-tools/,' * diff --git a/cmake/DetermineProcessor.cmake b/cmake/DetermineProcessor.cmake index 88cb465..35095ca 100644 --- a/cmake/DetermineProcessor.cmake +++ b/cmake/DetermineProcessor.cmake @@ -16,53 +16,13 @@ IF(UNIX) STRING(REGEX REPLACE "/" "_" NV_SYSTEM_PROCESSOR "${NV_SYSTEM_PROCESSOR}") ENDIF(CMAKE_UNAME) -#~ # Get extended processor information from /proc/cpuinfo -#~ IF(EXISTS "/proc/cpuinfo") - -#~ FILE(READ /proc/cpuinfo PROC_CPUINFO) - -#~ SET(VENDOR_ID_RX "vendor_id[ \t]*:[ \t]*([a-zA-Z]+)\n") -#~ STRING(REGEX MATCH "${VENDOR_ID_RX}" VENDOR_ID "${PROC_CPUINFO}") -#~ STRING(REGEX REPLACE "${VENDOR_ID_RX}" "\\1" VENDOR_ID "${VENDOR_ID}") - -#~ SET(CPU_FAMILY_RX "cpu family[ \t]*:[ \t]*([0-9]+)") -#~ STRING(REGEX MATCH "${CPU_FAMILY_RX}" CPU_FAMILY "${PROC_CPUINFO}") -#~ STRING(REGEX REPLACE "${CPU_FAMILY_RX}" "\\1" CPU_FAMILY "${CPU_FAMILY}") - -#~ SET(MODEL_RX "model[ \t]*:[ \t]*([0-9]+)") -#~ STRING(REGEX MATCH "${MODEL_RX}" MODEL "${PROC_CPUINFO}") -#~ STRING(REGEX REPLACE "${MODEL_RX}" "\\1" MODEL "${MODEL}") - -#~ SET(FLAGS_RX "flags[ \t]*:[ \t]*([a-zA-Z0-9 _]+)\n") -#~ STRING(REGEX MATCH "${FLAGS_RX}" FLAGS "${PROC_CPUINFO}") -#~ STRING(REGEX REPLACE "${FLAGS_RX}" "\\1" FLAGS "${FLAGS}") - -#~ # Debug output. -#~ IF(LINUX_CPUINFO) -#~ MESSAGE(STATUS "LinuxCPUInfo.cmake:") -#~ MESSAGE(STATUS "VENDOR_ID : ${VENDOR_ID}") -#~ MESSAGE(STATUS "CPU_FAMILY : ${CPU_FAMILY}") -#~ MESSAGE(STATUS "MODEL : ${MODEL}") -#~ MESSAGE(STATUS "FLAGS : ${FLAGS}") -#~ ENDIF(LINUX_CPUINFO) - -#~ ENDIF(EXISTS "/proc/cpuinfo") - -#~ # Information on how to decode CPU_FAMILY and MODEL: -#~ # http://balusc.xs4all.nl/srv/har-cpu-int-pm.php + # Get extended processor information with: + # `cat /proc/cpuinfo` ELSE(UNIX) - IF(WIN32) - # It's not OK to trust $ENV{PROCESSOR_ARCHITECTURE}: its value depends on the type of executable being run, - # so a 32-bit cmake (the default binary distribution) will always say "x86" regardless of the actual target. - IF (CMAKE_SIZEOF_VOID_P EQUAL 8) - SET (NV_SYSTEM_PROCESSOR "AMD64") - ELSE(CMAKE_SIZEOF_VOID_P EQUAL 8) - SET (NV_SYSTEM_PROCESSOR "x86") - ENDIF(CMAKE_SIZEOF_VOID_P EQUAL 8) + SET (NV_SYSTEM_PROCESSOR "$ENV{PROCESSOR_ARCHITECTURE}") ENDIF(WIN32) - ENDIF(UNIX) diff --git a/cmake/FindCUDA.cmake b/cmake/FindCUDA.cmake index 4d32400..9261da4 100644 --- a/cmake/FindCUDA.cmake +++ b/cmake/FindCUDA.cmake @@ -1,1123 +1,142 @@ -############################################################################### -# For more information, please see: http://software.sci.utah.edu -# -# The MIT License -# -# Copyright (c) 2008-2009 -# NVIDIA Corp. -# -# Copyright (c) 2007-2009 -# Scientific Computing and Imaging Institute, University of Utah -# -# License for the specific language governing rights and limitations under -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -# -# This script locates the Nvidia Compute Unified Driver Architecture (CUDA) -# tools. It should work on linux, windows, and mac and should be reasonably -# up to date with cuda releases. -# -# This script makes use of the standard find_package arguments of , -# REQUIRED and QUIET. CUDA_FOUND will report if an acceptable version of CUDA -# was found. -# -# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if the -# prefix cannot be determined by the location of nvcc in the system path. To -# use a different installed version of the toolkit set the environment variable -# CUDA_BIN_PATH before running cmake (e.g. CUDA_BIN_PATH=/usr/local/cuda1.0 -# instead of the default /usr/local/cuda). -# -# Set CUDA_BUILD_EMULATION to ON for Emulation mode. Defaults to OFF (device -# mode). -# _DEVICEEMU is defined when CUDA_BUILD_EMULATION is TRUE. -# -# Set CUDA_HOST_COMPILATION_CPP to OFF for C compilation of host code. -# Default TRUE. -# -# Set CUDA_BUILD_CUBIN to "ON" or "OFF" to enable and extra compilation pass -# with the -cubin option in Device mode. The output is parsed and register, -# shared memory usage is printed during build. Default ON. -# -# Set CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE to ON if you want the custom build -# rule to be attached to the source file in Visual Studio. Defaults to ON. -# Turn OFF if you add the same cuda file to multiple targets. -# -# This allows the user to build the target from the CUDA file, however bad -# things can happen if the CUDA source file is added to multiple targets. When -# performing parallel builds it is possible for the custom build command to be -# run more than once and in parallel causing cryptic build errors. This is -# because VS runs the rules for every source file in the target, and a source -# can have only one rule no matter how many projects it is added to. Therefore, -# the rule assigned to the source file really only applies to one target you get -# clashes when it is run from multiple targets. Eventually everything will get -# built, but if the user is unaware of this behavior, there may be confusion. -# It would be nice if we could detect the reuse of source files across multiple -# targets and turn the option off for the user, but no good solution could be -# found. -# -# Set CUDA_64_BIT_DEVICE_CODE to ON to compile for 64 bit devices. Defaults to -# match host bit size. Note that making this different than the host code when -# generating C files from CUDA code just won't work, because size_t gets defined -# by nvcc in the generated source. If you compile to PTX and then load the file -# yourself, you can mix bit sizes between device and host. -# -# Set CUDA_VERBOSE_BUILD to ON to see all the commands used when building the -# CUDA file. When using a Makefile generator the value defaults to VERBOSE (run -# make VERBOSE=1 to see output). You can override this by setting -# CUDA_VERBOSE_BUILD to ON. -# -# Set CUDA_GENERATED_OUTPUT_DIR to the path you wish to have the generated files -# placed. If it is blank output files will be placed in -# CMAKE_CURRENT_BINARY_DIR. Intermediate files will always be placed in -# CMAKE_CURRENT_BINARY_DIR. -# -# The script creates the following macros: -# CUDA_INCLUDE_DIRECTORIES( path0 path1 ... ) -# -- Sets the directories that should be passed to nvcc -# (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu -# files. -# -# CUDA_ADD_LIBRARY( cuda_target file0 file1 ... [OPTIONS ...] ) -# -- Creates a shared library "cuda_target" which contains all of the source -# (*.c, *.cc, etc.) specified and all of the nvcc'ed .cu files specified. -# All of the specified source files and generated .cpp files are compiled -# using the standard CMake compiler, so the normal INCLUDE_DIRECTORIES, -# LINK_DIRECTORIES, and TARGET_LINK_LIBRARIES can be used to affect their -# build and link. In addition CUDA_INCLUDE_DIRS is added automatically added -# to include_directories(). -# -# CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ... [OPTIONS ...] ) -# -- Same as CUDA_ADD_LIBRARY except that an exectuable is created. -# -# CUDA_COMPILE( generated_files file0 file1 ... [OPTIONS ...] ) -# -- Returns a list of generated files from the input source files to be used -# with ADD_LIBRARY or ADD_EXECUTABLE. -# -# CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] ) -# -- Returns a list of PTX files generated from the input source files. -# -# CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ... -# [OPTIONS ...] ) -# -- This is where all the magic happens. CUDA_ADD_EXECUTABLE, -# CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this function -# under the hood. -# -# Given the list of files (file0 file1 ... fileN) this macro generates custom -# commands that generate either PTX or linkable objects (use "PTX" or "OBJ" -# for the format argument to switch. Files that don't end with .cu or have -# the HEADER_FILE_ONLY property are ignored. -# -# The arguments passed in after OPTIONS are extra command line options to -# give to NVCC. You can also specify per configuration options by specifying -# the name of the configuration followed by the options. General options -# must preceed configuration specific options. Not all configurations need -# to be specified, only the ones provided will be used. -# -# OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag" -# DEBUG -g -# RELEASE --use_fast_math -# RELWITHDEBINFO --use_fast_math;-g -# MINSIZEREL --use_fast_math -# -# For certain configurations (namely VS generating object files with -# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will -# be produced for the given cuda file. This is because when you add the cuda -# file to Visual Studio it knows that this file produces and will link in the -# resulting object file automatically. -# -# This script will also generate a separate cmake script that is used at -# build time to invoke nvcc. This is for serveral reasons. -# -# 1. nvcc can return negative numbers as return values which confuses -# Visual Studio into thinking that the command succeeded. The script now -# checks the error codes and produces errors when there was a problem -# -# 2. nvcc has been known to not delete intermediate results when it -# encounters problems. The build rules then don't complete, because there -# exists a partially written output file. The script now deletes the -# output files if there was an error. -# -# 3. By putting all the options that affect the build into a file and then -# make the build rule dependent on the file, when the options change the -# output files will be regenerated. -# -# CUDA_ADD_CUFFT_TO_TARGET( cuda_target ) -# -- Adds the cufft library to the target. Handles whether you are in emulation -# mode or not. -# -# CUDA_ADD_CUBLAS_TO_TARGET( cuda_target ) -# -- Adds the cublas library to the target. Handles whether you are in emulation -# mode or not. -# -# CUDA_BUILD_CLEAN_TARGET() -# -- Creates a convience target that deletes all the dependency files generated. -# You should make clean after running this target to ensure the dependency -# files get regenerated. -# -# The script defines the following variables: -# -# ( Note CUDA_ADD_* macros setup cuda/cut library dependencies automatically. -# These variables are only needed if a cuda API call must be made from code in -# a outside library or executable. ) -# -# CUDA_VERSION_MAJOR -- The major version of cuda as reported by nvcc. -# CUDA_VERSION_MINOR -- The minor version. -# CUDA_VERSION -# CUDA_VERSION_STRING -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR -# -# CUDA_INCLUDE_DIRS -- Include directory for cuda headers. Added automatically -# for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY. -# CUDA_LIBRARIES -- Cuda RT library. -# CUDA_CUT_INCLUDE_DIR -- Include directory for cuda SDK headers (cutil.h). -# CUDA_CUT_LIBRARIES -- SDK libraries. -# CUDA_NVCC_FLAGS -- Additional NVCC command line arguments. NOTE: -# multiple arguments must be semi-colon delimited -# e.g. --compiler-options;-Wall -# CUDA_NVCC_FLAGS_ -- Confugration specific flags for NVCC. -# CUDA_CUFFT_LIBRARIES -- Device or emulation library for the Cuda FFT -# implementation (alternative to: -# CUDA_ADD_CUFFT_TO_TARGET macro) -# CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS -# implementation (alterative to: -# CUDA_ADD_CUBLAS_TO_TARGET macro). -# -# -# The script now builds object files instead of generating C files. In order to -# facilitate this, the script now makes use of the CMAKE_{C,CXX}_FLAGS along -# with their configuration dependent counterparts (i.e. CMAKE_C_FLAGS_DEBUG). -# These flags are passed through nvcc to the native compiler. In addition, on -# some systems special flags are added for building objects intended for shared -# libraries. FindCUDA make use of the CMake variable BUILD_SHARED_LIBS to -# determine if these flags should be used. Please set this variable according -# to how the objects are to be used before calling CUDA_ADD_LIBRARY. A -# preprocessor macro, _EXPORTS is defined when BUILD_SHARED_LIBS is -# defined. In addition, flags passed into add_definitions with -D or /D are -# passed along to nvcc. -# -# Files with the HEADER_FILE_ONLY property set will not be compiled. -# -# It might be necessary to set CUDA_TOOLKIT_ROOT_DIR manually on certain platforms, -# or to use a cuda runtime not installed in the default location. In newer -# versions of the toolkit the cuda library is included with the graphics -# driver- be sure that the driver version matches what is needed by the cuda -# runtime version. -# -# -- Abe Stephens SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html -# -- James Bigler NVIDIA Corp -############################################################################### - -# FindCUDA.cmake - -# We need to have at least this version to support the VERSION_LESS argument to 'if'. -cmake_policy(PUSH) -cmake_minimum_required(VERSION 2.6.2) -cmake_policy(POP) - -# This macro helps us find the location of helper files we will need the full path to -macro(CUDA_FIND_HELPER_FILE _name _extension) - set(_full_name "${_name}.${_extension}") - # CMAKE_CURRENT_LIST_FILE contains the full path to the file currently being - # processed. Using this variable, we can pull out the current path, and - # provide a way to get access to the other files we need local to here. - get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) - find_file(CUDA_${_name} ${_full_name} PATHS ${CMAKE_CURRENT_LIST_DIR} NO_DEFAULT_PATH) - if(NOT CUDA_${_name}) - set(error_message "${_full_name} not found in CMAKE_MODULE_PATH") - if(CUDA_FIND_REQUIRED) - message(FATAL_ERROR "${error_message}") - else(CUDA_FIND_REQUIRED) - if(NOT CUDA_FIND_QUIETLY) - message(STATUS "${error_message}") - endif(NOT CUDA_FIND_QUIETLY) - endif(CUDA_FIND_REQUIRED) - endif(NOT CUDA_${_name}) - # Set this variable as internal, so the user isn't bugged with it. - set(CUDA_${_name} ${CUDA_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE) -endmacro(CUDA_FIND_HELPER_FILE) - -##################################################################### -## CUDA_INCLUDE_NVCC_DEPENDENCIES -## - -# So we want to try and include the dependency file if it exists. If -# it doesn't exist then we need to create an empty one, so we can -# include it. - -# If it does exist, then we need to check to see if all the files it -# depends on exist. If they don't then we should clear the dependency -# file and regenerate it later. This covers the case where a header -# file has disappeared or moved. - -macro(CUDA_INCLUDE_NVCC_DEPENDENCIES dependency_file) - set(CUDA_NVCC_DEPEND) - set(CUDA_NVCC_DEPEND_REGENERATE FALSE) - - - # Include the dependency file. Create it first if it doesn't exist . The - # INCLUDE puts a dependency that will force CMake to rerun and bring in the - # new info when it changes. DO NOT REMOVE THIS (as I did and spent a few - # hours figuring out why it didn't work. - if(NOT EXISTS ${dependency_file}) - file(WRITE ${dependency_file} "#FindCUDA.cmake generated file. Do not edit.\n") - endif() - # Always include this file to force CMake to run again next - # invocation and rebuild the dependencies. - #message("including dependency_file = ${dependency_file}") - include(${dependency_file}) - - # Now we need to verify the existence of all the included files - # here. If they aren't there we need to just blank this variable and - # make the file regenerate again. -# if(DEFINED CUDA_NVCC_DEPEND) -# message("CUDA_NVCC_DEPEND set") -# else() -# message("CUDA_NVCC_DEPEND NOT set") -# endif() - if(CUDA_NVCC_DEPEND) - #message("CUDA_NVCC_DEPEND true") - foreach(f ${CUDA_NVCC_DEPEND}) - #message("searching for ${f}") - if(NOT EXISTS ${f}) - #message("file ${f} not found") - set(CUDA_NVCC_DEPEND_REGENERATE TRUE) - endif() - endforeach(f) - else(CUDA_NVCC_DEPEND) - #message("CUDA_NVCC_DEPEND false") - # No dependencies, so regenerate the file. - set(CUDA_NVCC_DEPEND_REGENERATE TRUE) - endif(CUDA_NVCC_DEPEND) - - #message("CUDA_NVCC_DEPEND_REGENERATE = ${CUDA_NVCC_DEPEND_REGENERATE}") - # No incoming dependencies, so we need to generate them. Make the - # output depend on the dependency file itself, which should cause the - # rule to re-run. - if(CUDA_NVCC_DEPEND_REGENERATE) - file(WRITE ${dependency_file} "#FindCUDA.cmake generated file. Do not edit.\n") - endif(CUDA_NVCC_DEPEND_REGENERATE) - -endmacro(CUDA_INCLUDE_NVCC_DEPENDENCIES) - -############################################################################### -############################################################################### -# Setup default variables -############################################################################### -############################################################################### - -# Set whether we are using emulation or device mode. -option(CUDA_BUILD_EMULATION "Build in Emulation mode" OFF) -# Parse HOST_COMPILATION mode. -option(CUDA_HOST_COMPILATION_CPP "Generated file extension" ON) -# Allow the user to specify if the device code is supposed to be 32 or 64 bit. -if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(CUDA_64_BIT_DEVICE_CODE_DEFAULT ON) -else() - set(CUDA_64_BIT_DEVICE_CODE_DEFAULT OFF) -endif() -option(CUDA_64_BIT_DEVICE_CODE "Compile device code in 64 bit mode" ${CUDA_64_BIT_DEVICE_CODE_DEFAULT}) -# Prints out extra information about the cuda file during compilation -option(CUDA_BUILD_CUBIN "Generate and parse .cubin files in Device mode." ON) -# Extra user settable flags -set(CUDA_NVCC_FLAGS "" CACHE STRING "Semi-colon delimit multiple arguments.") -# Attach the build rule to the source file in VS. This option -option(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE "Attach the build rule to the CUDA source file. Enable only when the CUDA source file is added to at most one target." ON) -# Specifies whether the commands used when compiling the .cu file will be printed out. -option(CUDA_VERBOSE_BUILD "Print out the commands run while compiling the CUDA source file. With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF) -# Where to put the generated output. -set(CUDA_GENERATED_OUTPUT_DIR "" CACHE PATH "Directory to put all the output files. If blank it will default to the CMAKE_CURRENT_BINARY_DIR") -mark_as_advanced( - CUDA_HOST_COMPILATION_CPP - CUDA_64_BIT_DEVICE_CODE - CUDA_NVCC_FLAGS - CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE - CUDA_GENERATED_OUTPUT_DIR - ) - -# Makefile and similar generators don't define CMAKE_CONFIGURATION_TYPES, so we -# need to add another entry for the CMAKE_BUILD_TYPE. We also need to add the -# standerd set of 4 build types (Debug, MinSizeRel, Release, and RelWithDebInfo) -# for completeness. We need run this loop in order to accomodate the addition -# of extra configuration types. Duplicate entries will be removed by -# REMOVE_DUPLICATES. -set(CUDA_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo) -list(REMOVE_DUPLICATES CUDA_configuration_types) -foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - set(CUDA_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semi-colon delimit multiple arguments.") - mark_as_advanced(CUDA_NVCC_FLAGS_${config_upper}) -endforeach() - -############################################################################### -############################################################################### -# Locate CUDA, Set Build Type, etc. -############################################################################### -############################################################################### - -# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed, -# if they have then clear the cache variables, so that will be detected again. -if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}") - unset(CUDA_NVCC_EXECUTABLE CACHE) - unset(CUDA_VERSION CACHE) - unset(CUDA_TOOLKIT_INCLUDE CACHE) - unset(CUDA_CUDART_LIBRARY CACHE) - unset(CUDA_CUDA_LIBRARY CACHE) - unset(CUDA_cublas_LIBRARY CACHE) - unset(CUDA_cublasemu_LIBRARY CACHE) - unset(CUDA_cufft_LIBRARY CACHE) - unset(CUDA_cufftemu_LIBRARY CACHE) -endif() - -if(NOT "${CUDA_SDK_ROOT_DIR}" STREQUAL "${CUDA_SDK_ROOT_DIR_INTERNAL}") - unset(CUDA_CUT_INCLUDE_DIR CACHE) - unset(CUDA_CUT_LIBRARY CACHE) -endif() - -# Search for the cuda distribution. -if(NOT CUDA_TOOLKIT_ROOT_DIR) - - # Search in the CUDA_BIN_PATH first. - find_path(CUDA_TOOLKIT_ROOT_DIR - NAMES nvcc nvcc.exe - PATHS ENV CUDA_BIN_PATH - DOC "Toolkit location." - NO_DEFAULT_PATH - ) - # Now search default paths - find_path(CUDA_TOOLKIT_ROOT_DIR - NAMES nvcc nvcc.exe - PATHS /usr/local/bin - /usr/local/cuda/bin - DOC "Toolkit location." - ) - - if (CUDA_TOOLKIT_ROOT_DIR) - string(REGEX REPLACE "[/\\\\]?bin[/\\\\]?$" "" CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR}) - # We need to force this back into the cache. - set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR} CACHE PATH "Toolkit location." FORCE) - endif(CUDA_TOOLKIT_ROOT_DIR) - if (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR}) - if(CUDA_FIND_REQUIRED) - message(FATAL_ERROR "Specify CUDA_TOOLKIT_ROOT_DIR") - elseif(NOT CUDA_FIND_QUIETLY) - message("CUDA_TOOLKIT_ROOT_DIR not found or specified") - endif() - endif (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR}) -endif (NOT CUDA_TOOLKIT_ROOT_DIR) - -# CUDA_NVCC_EXECUTABLE -find_program(CUDA_NVCC_EXECUTABLE - NAMES nvcc - PATHS "${CUDA_TOOLKIT_ROOT_DIR}/bin" - ENV CUDA_BIN_PATH - NO_DEFAULT_PATH - ) -# Search default search paths, after we search our own set of paths. -find_program(CUDA_NVCC_EXECUTABLE nvcc) -mark_as_advanced(CUDA_NVCC_EXECUTABLE) - -if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION) - # Compute the version. - exec_program(${CUDA_NVCC_EXECUTABLE} ARGS "--version" OUTPUT_VARIABLE NVCC_OUT) - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT}) - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT}) - set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.") - mark_as_advanced(CUDA_VERSION) -endif() - -# Always set this convenience variable -set(CUDA_VERSION_STRING "${CUDA_VERSION}") - -# Here we need to determine if the version we found is acceptable. We will -# assume that is unless CUDA_FIND_VERSION_EXACT or CUDA_FIND_VERSION is -# specified. The presence of either of these options checks the version -# string and signals if the version is acceptable or not. -set(_cuda_version_acceptable TRUE) -# -if(CUDA_FIND_VERSION_EXACT AND NOT CUDA_VERSION VERSION_EQUAL CUDA_FIND_VERSION) - set(_cuda_version_acceptable FALSE) -endif() -# -if(CUDA_FIND_VERSION AND CUDA_VERSION VERSION_LESS CUDA_FIND_VERSION) - set(_cuda_version_acceptable FALSE) -endif() -# -if(NOT _cuda_version_acceptable) - set(_cuda_error_message "Requested CUDA version ${CUDA_FIND_VERSION}, but found unacceptable version ${CUDA_VERSION}") - if(CUDA_FIND_REQUIRED) - message("${_cuda_error_message}") - elseif(NOT CUDA_FIND_QUIETLY) - message("${_cuda_error_message}") - endif() -endif() - -# CUDA_TOOLKIT_INCLUDE -find_path(CUDA_TOOLKIT_INCLUDE - device_functions.h # Header included in toolkit - PATHS "${CUDA_TOOLKIT_ROOT_DIR}/include" - ENV CUDA_INC_PATH - NO_DEFAULT_PATH - ) -# Search default search paths, after we search our own set of paths. -find_path(CUDA_TOOLKIT_INCLUDE device_functions.h) -mark_as_advanced(CUDA_TOOLKIT_INCLUDE) - -# Set the user list of include dir to nothing to initialize it. -set (CUDA_NVCC_INCLUDE_ARGS_USER "") -set (CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) - -macro(FIND_LIBRARY_LOCAL_FIRST _var _names _doc) - find_library(${_var} - NAMES ${_names} - PATHS "${CUDA_TOOLKIT_ROOT_DIR}/lib" - ENV CUDA_LIB_PATH - DOC ${_doc} - NO_DEFAULT_PATH - ) - # Search default search paths, after we search our own set of paths. - find_library(${_var} NAMES ${_names} DOC ${_doc}) -endmacro() - -# CUDA_LIBRARIES -find_library_local_first(CUDA_CUDART_LIBRARY cudart "\"cudart\" library") -set(CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY}) - -# 1.1 toolkit on linux doesn't appear to have a separate library on -# some platforms. -find_library_local_first(CUDA_CUDA_LIBRARY cuda "\"cuda\" library (older versions only).") - -# Add cuda library to the link line only if it is found. -if (CUDA_CUDA_LIBRARY) - set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) -endif(CUDA_CUDA_LIBRARY) - -mark_as_advanced( - CUDA_CUDA_LIBRARY - CUDA_CUDART_LIBRARY - ) - -####################### -# Look for some of the toolkit helper libraries -macro(FIND_CUDA_HELPER_LIBS _name) - find_library_local_first(CUDA_${_name}_LIBRARY ${_name} "\"${_name}\" library") - mark_as_advanced(CUDA_${_name}_LIBRARY) -endmacro(FIND_CUDA_HELPER_LIBS) - -# Search for cufft and cublas libraries. -find_cuda_helper_libs(cufftemu) -find_cuda_helper_libs(cublasemu) -find_cuda_helper_libs(cufft) -find_cuda_helper_libs(cublas) - -if (CUDA_BUILD_EMULATION) - set(CUDA_CUFFT_LIBRARIES ${CUDA_cufftemu_LIBRARY}) - set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublasemu_LIBRARY}) -else() - set(CUDA_CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY}) - set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY}) -endif() - -######################## -# Look for the SDK stuff -find_path(CUDA_SDK_ROOT_DIR common/inc/cutil.h - "$ENV{NVSDKCUDA_ROOT}" - "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]" - ) - -# Keep the CUDA_SDK_ROOT_DIR first in order to be able to override the -# environment variables. -set(CUDA_SDK_SEARCH_PATH - "${CUDA_SDK_ROOT_DIR}" - "${CUDA_TOOLKIT_ROOT_DIR}/local/NVSDK0.2" - "${CUDA_TOOLKIT_ROOT_DIR}/NVSDK0.2" - "${CUDA_TOOLKIT_ROOT_DIR}/NV_CUDA_SDK" - "$ENV{HOME}/NVIDIA_CUDA_SDK" - "$ENV{HOME}/NVIDIA_CUDA_SDK_MACOSX" - "/Developer/CUDA" - ) -# CUDA_CUT_INCLUDE_DIR -find_path(CUDA_CUT_INCLUDE_DIR - cutil.h - PATHS ${CUDA_SDK_SEARCH_PATH} - PATH_SUFFIXES "common/inc" - DOC "Location of cutil.h" - NO_DEFAULT_PATH - ) -# Now search system paths -find_path(CUDA_CUT_INCLUDE_DIR cutil.h DOC "Location of cutil.h") - -mark_as_advanced(CUDA_CUT_INCLUDE_DIR) - - -# CUDA_CUT_LIBRARIES - -# cutil library is called cutil64 for 64 bit builds on windows. We don't want -# to get these confused, so we are setting the name based on the word size of -# the build. -if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(cuda_cutil_name cutil64) -else(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(cuda_cutil_name cutil32) -endif(CMAKE_SIZEOF_VOID_P EQUAL 8) - -find_library(CUDA_CUT_LIBRARY - NAMES cutil ${cuda_cutil_name} - PATHS ${CUDA_SDK_SEARCH_PATH} - # The new version of the sdk shows up in common/lib, but the old one is in lib - PATH_SUFFIXES "common/lib" "lib" - DOC "Location of cutil library" - NO_DEFAULT_PATH - ) -# Now search system paths -find_library(CUDA_CUT_LIBRARY NAMES cutil ${cuda_cutil_name} DOC "Location of cutil library") -mark_as_advanced(CUDA_CUT_LIBRARY) -set(CUDA_CUT_LIBRARIES ${CUDA_CUT_LIBRARY}) - - - -############################# -# Check for required components -set(CUDA_FOUND TRUE) - -set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL - "This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE) -set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL - "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(CUDA DEFAULT_MSG - CUDA_TOOLKIT_ROOT_DIR - CUDA_NVCC_EXECUTABLE - CUDA_INCLUDE_DIRS - CUDA_CUDART_LIBRARY - _cuda_version_acceptable - ) - - - -############################################################################### -############################################################################### -# Macros -############################################################################### -############################################################################### - -############################################################################### -# Add include directories to pass to the nvcc command. -macro(CUDA_INCLUDE_DIRECTORIES) - foreach(dir ${ARGN}) - list(APPEND CUDA_NVCC_INCLUDE_ARGS_USER "-I${dir}") - endforeach(dir ${ARGN}) -endmacro(CUDA_INCLUDE_DIRECTORIES) - - -############################################################################## -cuda_find_helper_file(parse_cubin cmake) -cuda_find_helper_file(make2cmake cmake) -cuda_find_helper_file(run_nvcc cmake) - -############################################################################## -# Separate the OPTIONS out from the sources -# -macro(CUDA_GET_SOURCES_AND_OPTIONS _sources _options) - set( ${_sources} ) - set( ${_options} ) - set( _found_options FALSE ) - foreach(arg ${ARGN}) - if(arg STREQUAL "OPTIONS") - set( _found_options TRUE ) - else() - if ( _found_options ) - list(APPEND ${_options} "${arg}") - else() - # Assume this is a file - list(APPEND ${_sources} "${arg}") - endif() - endif() - endforeach() -endmacro() - -############################################################################## -# Parse the OPTIONS from ARGN and set the variables prefixed by _option_prefix -# -macro(CUDA_PARSE_NVCC_OPTIONS _option_prefix) - set( _found_config ) - foreach(arg ${ARGN}) - # Determine if we are dealing with a perconfiguration flag - foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - if (arg STREQUAL "${config_upper}") - set( _found_config _${arg}) - # Set arg to nothing to keep it from being processed further - set( arg ) - endif() - endforeach() - - if ( arg ) - list(APPEND ${_option_prefix}${_found_config} "${arg}") - endif() - endforeach() -endmacro() - -############################################################################## -# Helper to add the include directory for CUDA only once -function(CUDA_ADD_CUDA_INCLUDE_ONCE) - get_directory_property(_include_directories INCLUDE_DIRECTORIES) - set(_add TRUE) - if(_include_directories) - foreach(dir ${_include_directories}) - if("${dir}" STREQUAL "${CUDA_INCLUDE_DIRS}") - set(_add FALSE) - endif() - endforeach() - endif() - if(_add) - include_directories(${CUDA_INCLUDE_DIRS}) - endif() -endfunction() - -############################################################################## -# This helper macro populates the following variables and setups up custom -# commands and targets to invoke the nvcc compiler to generate C or PTX source -# dependant upon the format parameter. The compiler is invoked once with -M -# to generate a dependency file and a second time with -cuda or -ptx to generate -# a .cpp or .ptx file. -# INPUT: -# cuda_target - Target name -# format - PTX or OBJ -# FILE1 .. FILEN - The remaining arguments are the sources to be wrapped. -# OPTIONS - Extra options to NVCC -# OUTPUT: -# generated_files - List of generated files -############################################################################## -############################################################################## - -macro(CUDA_WRAP_SRCS cuda_target format generated_files) - - if( ${format} MATCHES "PTX" ) - set( compile_to_ptx ON ) - elseif( ${format} MATCHES "OBJ") - set( compile_to_ptx OFF ) - else() - message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS: '${format}'. Use OBJ or PTX.") - endif() - - # Set up all the command line flags here, so that they can be overriden on a per target basis. - - set(nvcc_flags "") - - # Emulation if the card isn't present. - if (CUDA_BUILD_EMULATION) - # Emulation. - set(nvcc_flags ${nvcc_flags} --device-emulation -D_DEVICEEMU -g) - else(CUDA_BUILD_EMULATION) - # Device mode. No flags necessary. - endif(CUDA_BUILD_EMULATION) - - if(CUDA_HOST_COMPILATION_CPP) - set(CUDA_C_OR_CXX CXX) - else(CUDA_HOST_COMPILATION_CPP) - set(nvcc_flags ${nvcc_flags} --host-compilation C) - set(CUDA_C_OR_CXX C) - endif(CUDA_HOST_COMPILATION_CPP) - - set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION}) - - if(CUDA_64_BIT_DEVICE_CODE) - set(nvcc_flags ${nvcc_flags} -m64) - else() - set(nvcc_flags ${nvcc_flags} -m32) - endif() - - # This needs to be passed in at this stage, because VS needs to fill out the - # value of VCInstallDir from within VS. - if(CMAKE_GENERATOR MATCHES "Visual Studio") - if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) - # Add nvcc flag for 64b Windows - set(ccbin_flags -D "\"CCBIN:PATH=$(VCInstallDir)bin\"" ) - endif() - endif() - - # Figure out which configure we will use and pass that in as an argument to - # the script. We need to defer the decision until compilation time, because - # for VS projects we won't know if we are making a debug or release build - # until build time. - if(CMAKE_GENERATOR MATCHES "Visual Studio") - set( CUDA_build_configuration "$(ConfigurationName)" ) - else() - set( CUDA_build_configuration "${CMAKE_BUILD_TYPE}") - endif() - - # Initialize our list of includes with the user ones followed by the CUDA system ones. - set(CUDA_NVCC_INCLUDE_ARGS ${CUDA_NVCC_INCLUDE_ARGS_USER} "-I${CUDA_INCLUDE_DIRS}") - # Get the include directories for this directory and use them for our nvcc command. - get_directory_property(CUDA_NVCC_INCLUDE_DIRECTORIES INCLUDE_DIRECTORIES) - if(CUDA_NVCC_INCLUDE_DIRECTORIES) - foreach(dir ${CUDA_NVCC_INCLUDE_DIRECTORIES}) - list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}") - endforeach() - endif() - - # Reset these variables - set(CUDA_WRAP_OPTION_NVCC_FLAGS) - foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - set(CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}) - endforeach() - - CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_options ${ARGN}) - CUDA_PARSE_NVCC_OPTIONS(CUDA_WRAP_OPTION_NVCC_FLAGS ${_cuda_wrap_options}) - - # CUDA_HOST_FLAGS - if(BUILD_SHARED_LIBS) - # If BUILD_SHARED_LIBS is true, then we need to add extra flags for - # compiling objects for shared libraries. - set(CUDA_HOST_SHARED_FLAGS ${CMAKE_SHARED_LIBRARY_${CUDA_C_OR_CXX}_FLAGS}) - endif() - set(CUDA_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${CUDA_C_OR_CXX}_FLAGS} ${CUDA_HOST_SHARED_FLAGS})") - set(CUDA_NVCC_FLAGS_CONFIG "# Build specific configuration flags") - # Loop over all the configuration types to generate appropriate flags for run_nvcc.cmake - foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - # CMAKE_FLAGS are strings and not lists. By not putting quotes around CMAKE_FLAGS - # we convert the strings to lists (like we want). - - # nvcc chokes on -g3, so replace it with -g - if(CMAKE_COMPILER_IS_GNUCC) - string(REPLACE "-g3" "-g" _cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}") - else() - set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}") - endif() - set(CUDA_HOST_FLAGS "${CUDA_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})") - # Note that if we ever want CUDA_NVCC_FLAGS_ to be string (instead of a list - # like it is currently), we can remove the quotes around the - # ${CUDA_NVCC_FLAGS_${config_upper}} variable like the CMAKE_HOST_FLAGS_ variable. - set(CUDA_NVCC_FLAGS_CONFIG "${CUDA_NVCC_FLAGS_CONFIG}\nset(CUDA_NVCC_FLAGS_${config_upper} \"${CUDA_NVCC_FLAGS_${config_upper}};${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}}\")") - endforeach() - - # Get the list of definitions from the directory property - get_directory_property(CUDA_NVCC_DEFINITIONS COMPILE_DEFINITIONS) - if(CUDA_NVCC_DEFINITIONS) - foreach(_definition ${CUDA_NVCC_DEFINITIONS}) - list(APPEND nvcc_flags "-D${_definition}") - endforeach() - endif() - - if(BUILD_SHARED_LIBS) - list(APPEND nvcc_flags "-D${cuda_target}_EXPORTS") - endif() - - # Determine output directory - if(CUDA_GENERATED_OUTPUT_DIR) - set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}") - else() - set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}") - endif() - - # Reset the output variable - set(_cuda_wrap_generated_files "") - - # Iterate over the macro arguments and create custom - # commands for all the .cu files. - foreach(file ${ARGN}) - # Ignore any file marked as a HEADER_FILE_ONLY - get_source_file_property(_is_header ${file} HEADER_FILE_ONLY) - if(${file} MATCHES ".*\\.cu$" AND NOT _is_header) - - # Add a custom target to generate a c or ptx file. ###################### - - get_filename_component( basename ${file} NAME ) - if( compile_to_ptx ) - set(generated_file_path "${cuda_compile_output_dir}") - set(generated_file_basename "${cuda_target}_generated_${basename}.ptx") - set(format_flag "-ptx") - file(MAKE_DIRECTORY "${cuda_compile_output_dir}") - else( compile_to_ptx ) - set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}") - set(generated_file_basename "${cuda_target}_generated_${basename}${generated_extension}") - set(format_flag "-c") - endif( compile_to_ptx ) - - # Set all of our file names. Make sure that whatever filenames that have - # generated_file_path in them get passed in through as a command line - # argument, so that the ${CMAKE_CFG_INTDIR} gets expanded at run time - # instead of configure time. - set(generated_file "${generated_file_path}/${generated_file_basename}") - set(cmake_dependency_file "${CMAKE_CURRENT_BINARY_DIR}/${generated_file_basename}.depend") - set(NVCC_generated_dependency_file "${CMAKE_CURRENT_BINARY_DIR}/${generated_file_basename}.NVCC-depend") - set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt") - set(custom_target_script "${CMAKE_CURRENT_BINARY_DIR}/${generated_file_basename}.cmake") - - # Setup properties for obj files: - if( NOT compile_to_ptx ) - set_source_files_properties("${generated_file}" - PROPERTIES - EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked. - ) - endif() - - # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path. - get_filename_component(file_path "${file}" PATH) - if(IS_ABSOLUTE "${file_path}") - set(source_file "${file}") - else() - set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}") - endif() - - # Bring in the dependencies. Creates a variable CUDA_NVCC_DEPEND ####### - cuda_include_nvcc_dependencies(${cmake_dependency_file}) - - # Convience string for output ########################################### - if(CUDA_BUILD_EMULATION) - set(cuda_build_type "Emulation") - else(CUDA_BUILD_EMULATION) - set(cuda_build_type "Device") - endif(CUDA_BUILD_EMULATION) - - # Build the NVCC made dependency file ################################### - set(build_cubin OFF) - if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN ) - if ( NOT compile_to_ptx ) - set ( build_cubin ON ) - endif( NOT compile_to_ptx ) - endif( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN ) - - # Configure the build script - configure_file("${CUDA_run_nvcc}" "${custom_target_script}" @ONLY) - - # So if a user specifies the same cuda file as input more than once, you - # can have bad things happen with dependencies. Here we check an option - # to see if this is the behavior they want. - if(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE) - set(main_dep MAIN_DEPENDENCY ${source_file}) - else() - set(main_dep DEPENDS ${source_file}) - endif() - - if(CUDA_VERBOSE_BUILD) - set(verbose_output ON) - elseif(CMAKE_GENERATOR MATCHES "Makefiles") - set(verbose_output "$(VERBOSE)") - else() - set(verbose_output OFF) - endif() - - # Create up the comment string - file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}") - if(compile_to_ptx) - set(cuda_build_comment_string "Building NVCC ptx file ${generated_file_relative_path}") - else() - set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}") - endif() - - # Build the generated file and dependency file ########################## - add_custom_command( - OUTPUT ${generated_file} - # These output files depend on the source_file and the contents of cmake_dependency_file - ${main_dep} - DEPENDS ${CUDA_NVCC_DEPEND} - DEPENDS ${custom_target_script} - COMMAND ${CMAKE_COMMAND} ARGS - -D verbose:BOOL=${verbose_output} - ${ccbin_flags} - -D build_configuration:STRING=${CUDA_build_configuration} - -D "generated_file:STRING=${generated_file}" - -D "generated_cubin_file:STRING=${generated_cubin_file}" - -P "${custom_target_script}" - COMMENT "${cuda_build_comment_string}" - ) - - # Make sure the build system knows the file is generated. - set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE) - - # Don't add the object file to the list of generated files if we are using - # visual studio and we are attaching the build rule to the cuda file. VS - # will add our object file to the linker automatically for us. - set(cuda_add_generated_file TRUE) - - if(NOT compile_to_ptx AND CMAKE_GENERATOR MATCHES "Visual Studio" AND CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE) - # Visual Studio 8 crashes when you close the solution when you don't add the object file. - if(NOT CMAKE_GENERATOR MATCHES "Visual Studio 8") - set(cuda_add_generated_file FALSE) - endif() - endif() - - if(cuda_add_generated_file) - list(APPEND _cuda_wrap_generated_files ${generated_file}) - endif() - - # Add the other files that we want cmake to clean on a cleanup ########## - list(APPEND CUDA_ADDITIONAL_CLEAN_FILES "${cmake_dependency_file}") - list(REMOVE_DUPLICATES CUDA_ADDITIONAL_CLEAN_FILES) - set(CUDA_ADDITIONAL_CLEAN_FILES ${CUDA_ADDITIONAL_CLEAN_FILES} CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.") - - endif(${file} MATCHES ".*\\.cu$" AND NOT _is_header) - endforeach(file) - - # Set the return parameter - set(${generated_files} ${_cuda_wrap_generated_files}) -endmacro(CUDA_WRAP_SRCS) - - -############################################################################### -############################################################################### -# ADD LIBRARY -############################################################################### -############################################################################### -macro(CUDA_ADD_LIBRARY cuda_target) - - CUDA_ADD_CUDA_INCLUDE_ONCE() - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _options ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} OPTIONS ${_options} ) - - # Add the library. - add_library(${cuda_target} - ${_generated_files} - ${_sources} - ) - - target_link_libraries(${cuda_target} - ${CUDA_LIBRARIES} - ) - - # We need to set the linker language based on what the expected generated file - # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP. - set_target_properties(${cuda_target} - PROPERTIES - LINKER_LANGUAGE ${CUDA_C_OR_CXX} - ) - -endmacro(CUDA_ADD_LIBRARY cuda_target) - - -############################################################################### -############################################################################### -# ADD EXECUTABLE -############################################################################### -############################################################################### -macro(CUDA_ADD_EXECUTABLE cuda_target) - - CUDA_ADD_CUDA_INCLUDE_ONCE() - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _options ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} OPTIONS ${_options} ) - - # Add the library. - add_executable(${cuda_target} - ${_generated_files} - ${_sources} - ) - - target_link_libraries(${cuda_target} - ${CUDA_LIBRARIES} - ) - - # We need to set the linker language based on what the expected generated file - # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP. - set_target_properties(${cuda_target} - PROPERTIES - LINKER_LANGUAGE ${CUDA_C_OR_CXX} - ) - -endmacro(CUDA_ADD_EXECUTABLE cuda_target) - - -############################################################################### -############################################################################### -# CUDA COMPILE -############################################################################### -############################################################################### -macro(CUDA_COMPILE generated_files) - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _options ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( cuda_compile OBJ _generated_files ${_sources} OPTIONS ${_options} ) - - set( ${generated_files} ${_generated_files}) - -endmacro(CUDA_COMPILE) - - -############################################################################### -############################################################################### -# CUDA COMPILE PTX -############################################################################### -############################################################################### -macro(CUDA_COMPILE_PTX generated_files) - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _options ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( cuda_compile_ptx PTX _generated_files ${_sources} OPTIONS ${_options} ) - - set( ${generated_files} ${_generated_files}) - -endmacro(CUDA_COMPILE_PTX) - -############################################################################### -############################################################################### -# CUDA ADD CUFFT TO TARGET -############################################################################### -############################################################################### -macro(CUDA_ADD_CUFFT_TO_TARGET target) - if (CUDA_BUILD_EMULATION) - target_link_libraries(${target} ${CUDA_cufftemu_LIBRARY}) - else() - target_link_libraries(${target} ${CUDA_cufft_LIBRARY}) - endif() -endmacro() - -############################################################################### -############################################################################### -# CUDA ADD CUBLAS TO TARGET -############################################################################### -############################################################################### -macro(CUDA_ADD_CUBLAS_TO_TARGET target) - if (CUDA_BUILD_EMULATION) - target_link_libraries(${target} ${CUDA_cublasemu_LIBRARY}) - else() - target_link_libraries(${target} ${CUDA_cublas_LIBRARY}) - endif() -endmacro() - -############################################################################### -############################################################################### -# CUDA BUILD CLEAN TARGET -############################################################################### -############################################################################### -macro(CUDA_BUILD_CLEAN_TARGET) - # Call this after you add all your CUDA targets, and you will get a convience - # target. You should also make clean after running this target to get the - # build system to generate all the code again. - - set(cuda_clean_target_name clean_cuda_depends) - if (CMAKE_GENERATOR MATCHES "Visual Studio") - string(TOUPPER ${cuda_clean_target_name} cuda_clean_target_name) - endif() - add_custom_target(${cuda_clean_target_name} - COMMAND ${CMAKE_COMMAND} -E remove ${CUDA_ADDITIONAL_CLEAN_FILES}) - - # Clear out the variable, so the next time we configure it will be empty. - # This is useful so that the files won't persist in the list after targets - # have been removed. - set(CUDA_ADDITIONAL_CLEAN_FILES "" CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.") -endmacro(CUDA_BUILD_CLEAN_TARGET) +# +# Try to find CUDA compiler, runtime libraries, and include path. +# Once done this will define +# +# CUDA_FOUND +# CUDA_INCLUDE_PATH +# CUDA_RUNTIME_LIBRARY +# CUDA_COMPILER +# +# It will also define the following macro: +# +# WRAP_CUDA +# + +IF (WIN32) + FIND_PROGRAM (CUDA_COMPILER nvcc.exe + $ENV{CUDA_BIN_PATH} + DOC "The CUDA Compiler") +ELSE(WIN32) + FIND_PROGRAM (CUDA_COMPILER nvcc + $ENV{CUDA_BIN_PATH} + /usr/local/cuda/bin + DOC "The CUDA Compiler") +ENDIF(WIN32) + +IF (CUDA_COMPILER) + GET_FILENAME_COMPONENT (CUDA_COMPILER_DIR ${CUDA_COMPILER} PATH) + GET_FILENAME_COMPONENT (CUDA_COMPILER_SUPER_DIR ${CUDA_COMPILER_DIR} PATH) +ELSE (CUDA_COMPILER) + SET (CUDA_COMPILER_DIR .) + SET (CUDA_COMPILER_SUPER_DIR ..) +ENDIF (CUDA_COMPILER) + +FIND_PATH (CUDA_INCLUDE_PATH cuda_runtime.h + $ENV{CUDA_INC_PATH} + ${CUDA_COMPILER_SUPER_DIR}/include + ${CUDA_COMPILER_DIR} + DOC "The directory where CUDA headers reside") + +FIND_LIBRARY (CUDA_RUNTIME_LIBRARY + NAMES cudart + PATHS + $ENV{CUDA_LIB_PATH} + ${CUDA_COMPILER_SUPER_DIR}/lib + ${CUDA_COMPILER_DIR} + DOC "The CUDA runtime library") + +IF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY) + SET (CUDA_FOUND TRUE) +ELSE (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY) + SET (CUDA_FOUND FALSE) +ENDIF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY) + +SET (CUDA_LIBRARIES ${CUDA_RUNTIME_LIBRARY}) + +MARK_AS_ADVANCED (CUDA_FOUND CUDA_COMPILER CUDA_RUNTIME_LIBRARY) + + +#SET(CUDA_OPTIONS "-ncfe") +SET(CUDA_OPTIONS "--host-compilation=C") + +IF (CUDA_EMULATION) + SET (CUDA_OPTIONS "${CUDA_OPTIONS} -deviceemu") +ENDIF (CUDA_EMULATION) + + +# Get include directories. +MACRO(GET_CUDA_INC_DIRS _cuda_INC_DIRS) + SET(${_cuda_INC_DIRS}) + GET_DIRECTORY_PROPERTY(_inc_DIRS INCLUDE_DIRECTORIES) + + FOREACH(_current ${_inc_DIRS}) + SET(${_cuda_INC_DIRS} ${${_cuda_INC_DIRS}} "-I" ${_current}) + ENDFOREACH(_current ${_inc_DIRS}) + + SET(${_cuda_INC_DIRS} ${${_cuda_INC_DIRS}} "-I" ${CUDA_INCLUDE_PATH}) + +# IF (CMAKE_SYTEM_INCLUDE_PATH) +# SET(${_cuda_INC_DIRS} ${${_cuda_INC_DIRS}} "-I" ${CMAKE_SYSTEM_INCLUDE_PATH}) +# ENDIF (CMAKE_SYTEM_INCLUDE_PATH) +# IF (CMAKE_INCLUDE_PATH) +# SET(${_cuda_INC_DIRS} ${${_cuda_INC_DIRS}} "-I" ${CMAKE_INCLUDE_PATH}) +# ENDIF (CMAKE_INCLUDE_PATH) + +ENDMACRO(GET_CUDA_INC_DIRS) + + +# Get file dependencies. +MACRO (GET_CUFILE_DEPENDENCIES dependencies file) + GET_FILENAME_COMPONENT(filepath ${file} PATH) + + # parse file for dependencies + FILE(READ "${file}" CONTENTS) + #STRING(REGEX MATCHALL "#[ \t]*include[ \t]+[<\"][^>\"]*" DEPS "${CONTENTS}") + STRING(REGEX MATCHALL "#[ \t]*include[ \t]+\"[^\"]*" DEPS "${CONTENTS}") + + SET(${dependencies}) + + FOREACH(DEP ${DEPS}) + STRING(REGEX REPLACE "#[ \t]*include[ \t]+\"" "" DEP "${DEP}") + + FIND_PATH(PATH_OF_${DEP} ${DEP} + ${filepath}) + + IF(NOT ${PATH_OF_${DEP}} STREQUAL PATH_OF_${DEP}-NOTFOUND) + #MESSAGE("${file} : ${PATH_OF_${DEP}}/${DEP}") + SET(${dependencies} ${${dependencies}} ${PATH_OF_${DEP}}/${DEP}) + ENDIF(NOT ${PATH_OF_${DEP}} STREQUAL PATH_OF_${DEP}-NOTFOUND) + + ENDFOREACH(DEP) + +ENDMACRO (GET_CUFILE_DEPENDENCIES) + + +# WRAP_CUDA(outfile ...) +MACRO (WRAP_CUDA outfiles) + GET_CUDA_INC_DIRS(cuda_includes) + #MESSAGE(${cuda_includes}) + + FOREACH (CUFILE ${ARGN}) + GET_FILENAME_COMPONENT (CUFILE ${CUFILE} ABSOLUTE) + GET_FILENAME_COMPONENT (CFILE ${CUFILE} NAME_WE) + SET (CFILE ${CMAKE_CURRENT_BINARY_DIR}/${CFILE}.gen.c) + + GET_CUFILE_DEPENDENCIES(CUDEPS ${CUFILE}) + #MESSAGE("${CUDEPS}") + + ADD_CUSTOM_COMMAND ( + OUTPUT ${CFILE} + COMMAND ${CUDA_COMPILER} + ARGS -cuda ${cuda_includes} ${CUDA_OPTIONS} -o ${CFILE} ${CUFILE} + MAIN_DEPENDENCY ${CUFILE} + DEPENDS ${CUDEPS}) + + #MACRO_ADD_FILE_DEPENDENCIES(${CUFILE} ${CFILE}) + + SET (${outfiles} ${${outfiles}} ${CFILE}) + ENDFOREACH (CUFILE) + + SET_SOURCE_FILES_PROPERTIES(${outfiles} PROPERTIES GENERATED 1) + +ENDMACRO (WRAP_CUDA) diff --git a/cmake/FindCg.cmake b/cmake/FindCg.cmake index 24402ab..302026f 100644 --- a/cmake/FindCg.cmake +++ b/cmake/FindCg.cmake @@ -10,6 +10,7 @@ # # On OSX default to using the framework version of Cg. + IF (APPLE) INCLUDE(${CMAKE_ROOT}/Modules/CMakeFindFrameworks.cmake) SET(CG_FRAMEWORK_INCLUDES) @@ -37,25 +38,13 @@ IF (APPLE) ) ELSE (APPLE) IF (WIN32) - - # When compiling 64-bit programs, the binaries and libs are in bin.x64 and lib.x64 directories, - - # This will have only effect for 64bit versions of cmake, when running the default 32bit version - # both ProgramFiles and ProgramFiles(x86) point to the same place in Win64 - SET(PFx86_VARNAME "ProgramFiles(x86)") - SET(PFx86 $ENV{${PFx86_VARNAME}}) - - # Let's play safe in case we are cross compiling to 64 bit: for cgc it doesn't really matter - FIND_PROGRAM( CG_COMPILER cgc - $ENV{CG_BIN64_PATH} - $ENV{CG_BIN_PATH} + FIND_PROGRAM( CG_COMPILER cgc + $ENV{CG_BIN_PATH} $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/bin - $ENV{PFx86}/NVIDIA\ Corporation/Cg/bin $ENV{PROGRAMFILES}/Cg ${PROJECT_SOURCE_DIR}/../Cg DOC "The Cg Compiler" - ) - + ) IF (CG_COMPILER) GET_FILENAME_COMPONENT(CG_COMPILER_DIR ${CG_COMPILER} PATH) GET_FILENAME_COMPONENT(CG_COMPILER_SUPER_DIR ${CG_COMPILER_DIR} PATH) @@ -72,59 +61,27 @@ ELSE (APPLE) ${CG_COMPILER_DIR} DOC "The directory where Cg/cg.h resides" ) - - IF (NV_SYSTEM_PROCESSOR STREQUAL "AMD64") - FIND_LIBRARY( CG_LIBRARY - NAMES Cg - PATHS - $ENV{CG_LIB64_PATH} - $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib.x64 - $ENV{PFx86}/NVIDIA\ Corporation/Cg/lib.x64 - $ENV{PROGRAMFILES}/Cg - $ENV{PFx86}/Cg - ${PROJECT_SOURCE_DIR}/../Cg - ${CG_COMPILER_SUPER_DIR}/lib.x64 - ${CG_COMPILER_DIR} - DOC "The Cg runtime library (64-bit)" - ) - FIND_LIBRARY( CG_GL_LIBRARY - NAMES CgGL - PATHS - $ENV{CG_LIB64_PATH} - $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib.x64 - $ENV{PFx86}/NVIDIA\ Corporation/Cg/lib.x64 - $ENV{PROGRAMFILES}/Cg - $ENV{PFx86}/Cg - ${PROJECT_SOURCE_DIR}/../Cg - ${CG_COMPILER_SUPER_DIR}/lib.x64 - ${CG_COMPILER_DIR} - DOC "The Cg GL runtime library (64-bit)" - ) - ELSE(NV_SYSTEM_PROCESSOR STREQUAL "AMD64") - FIND_LIBRARY( CG_LIBRARY - NAMES Cg - PATHS - $ENV{CG_LIB_PATH} - $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib - $ENV{PROGRAMFILES}/Cg - ${PROJECT_SOURCE_DIR}/../Cg - ${CG_COMPILER_SUPER_DIR}/lib - ${CG_COMPILER_DIR} - DOC "The Cg runtime library" - ) - FIND_LIBRARY( CG_GL_LIBRARY - NAMES CgGL - PATHS - $ENV{CG_LIB_PATH} - $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib - $ENV{PROGRAMFILES}/Cg - ${PROJECT_SOURCE_DIR}/../Cg - ${CG_COMPILER_SUPER_DIR}/lib - ${CG_COMPILER_DIR} - DOC "The Cg GL runtime library" - ) - ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "AMD64") - + FIND_LIBRARY( CG_LIBRARY + NAMES Cg + PATHS + $ENV{CG_LIB_PATH} + $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib + $ENV{PROGRAMFILES}/Cg + ${PROJECT_SOURCE_DIR}/../Cg + ${CG_COMPILER_SUPER_DIR}/lib + ${CG_COMPILER_DIR} + DOC "The Cg runtime library" + ) + FIND_LIBRARY( CG_GL_LIBRARY + NAMES CgGL + PATHS + $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib + $ENV{PROGRAMFILES}/Cg + ${PROJECT_SOURCE_DIR}/../Cg + ${CG_COMPILER_SUPER_DIR}/lib + ${CG_COMPILER_DIR} + DOC "The Cg runtime library" + ) ELSE (WIN32) FIND_PROGRAM( CG_COMPILER cgc /usr/bin diff --git a/cmake/FindFreeImage.cmake b/cmake/FindFreeImage.cmake deleted file mode 100644 index 2e99508..0000000 --- a/cmake/FindFreeImage.cmake +++ /dev/null @@ -1,53 +0,0 @@ -# -# Try to find the FreeImage library and include path. -# Once done this will define -# -# FREEIMAGE_FOUND -# FREEIMAGE_INCLUDE_PATH -# FREEIMAGE_LIBRARY -# - -IF (WIN32) - FIND_PATH( FREEIMAGE_INCLUDE_PATH FreeImage.h - ${FREEIMAGE_ROOT_DIR}/include - ${FREEIMAGE_ROOT_DIR} - DOC "The directory where FreeImage.h resides") - FIND_LIBRARY( FREEIMAGE_LIBRARY - NAMES FreeImage freeimage - PATHS - ${FREEIMAGE_ROOT_DIR}/lib - ${FREEIMAGE_ROOT_DIR} - DOC "The FreeImage library") -ELSE (WIN32) - FIND_PATH( FREEIMAGE_INCLUDE_PATH FreeImage.h - /usr/include - /usr/local/include - /sw/include - /opt/local/include - DOC "The directory where FreeImage.h resides") - FIND_LIBRARY( FREEIMAGE_LIBRARY - NAMES FreeImage freeimage - PATHS - /usr/lib64 - /usr/lib - /usr/local/lib64 - /usr/local/lib - /sw/lib - /opt/local/lib - DOC "The FreeImage library") -ENDIF (WIN32) - -SET(FREEIMAGE_LIBRARIES ${FREEIMAGE_LIBRARY}) - -IF (FREEIMAGE_INCLUDE_PATH AND FREEIMAGE_LIBRARY) - SET( FREEIMAGE_FOUND TRUE CACHE BOOL "Set to TRUE if FreeImage is found, FALSE otherwise") -ELSE (FREEIMAGE_INCLUDE_PATH AND FREEIMAGE_LIBRARY) - SET( FREEIMAGE_FOUND FALSE CACHE BOOL "Set to TRUE if FreeImage is found, FALSE otherwise") -ENDIF (FREEIMAGE_INCLUDE_PATH AND FREEIMAGE_LIBRARY) - -MARK_AS_ADVANCED( - FREEIMAGE_FOUND - FREEIMAGE_LIBRARY - FREEIMAGE_LIBRARIES - FREEIMAGE_INCLUDE_PATH) - diff --git a/cmake/FindGLEW.cmake b/cmake/FindGLEW.cmake index de35f6d..13c92d8 100644 --- a/cmake/FindGLEW.cmake +++ b/cmake/FindGLEW.cmake @@ -10,37 +10,22 @@ IF (WIN32) FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h $ENV{PROGRAMFILES}/GLEW/include - ${GLEW_ROOT_DIR}/include + ${PROJECT_SOURCE_DIR}/src/nvgl/glew/include DOC "The directory where GL/glew.h resides") - - IF (NV_SYSTEM_PROCESSOR STREQUAL "AMD64") - FIND_LIBRARY( GLEW_LIBRARY - NAMES glew64 glew64s - PATHS - $ENV{PROGRAMFILES}/GLEW/lib - ${PROJECT_SOURCE_DIR}/src/nvgl/glew/bin - ${PROJECT_SOURCE_DIR}/src/nvgl/glew/lib - DOC "The GLEW library (64-bit)" - ) - ELSE(NV_SYSTEM_PROCESSOR STREQUAL "AMD64") - FIND_LIBRARY( GLEW_LIBRARY - NAMES glew GLEW glew32 glew32s - PATHS - $ENV{PROGRAMFILES}/GLEW/lib - ${PROJECT_SOURCE_DIR}/src/nvgl/glew/bin - ${PROJECT_SOURCE_DIR}/src/nvgl/glew/lib - DOC "The GLEW library" - ) - ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "AMD64") + FIND_LIBRARY( GLEW_LIBRARY + NAMES glew GLEW glew32 glew32s + PATHS + $ENV{PROGRAMFILES}/GLEW/lib + ${PROJECT_SOURCE_DIR}/src/nvgl/glew/bin + ${PROJECT_SOURCE_DIR}/src/nvgl/glew/lib + DOC "The GLEW library") ELSE (WIN32) FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h /usr/include /usr/local/include /sw/include /opt/local/include - ${GLEW_ROOT_DIR}/include DOC "The directory where GL/glew.h resides") - FIND_LIBRARY( GLEW_LIBRARY NAMES GLEW glew PATHS @@ -50,13 +35,13 @@ ELSE (WIN32) /usr/local/lib /sw/lib /opt/local/lib - ${GLEW_ROOT_DIR}/lib DOC "The GLEW library") ENDIF (WIN32) -SET(GLEW_FOUND "NO") -IF (GLEW_INCLUDE_PATH AND GLEW_LIBRARY) - SET(GLEW_LIBRARIES ${GLEW_LIBRARY}) - SET(GLEW_FOUND "YES") -ENDIF (GLEW_INCLUDE_PATH AND GLEW_LIBRARY) +IF (GLEW_INCLUDE_PATH) + SET( GLEW_FOUND 1 CACHE STRING "Set to 1 if GLEW is found, 0 otherwise") +ELSE (GLEW_INCLUDE_PATH) + SET( GLEW_FOUND 0 CACHE STRING "Set to 1 if GLEW is found, 0 otherwise") +ENDIF (GLEW_INCLUDE_PATH) +MARK_AS_ADVANCED( GLEW_FOUND ) diff --git a/cmake/FindGLUT.cmake b/cmake/FindGLUT.cmake new file mode 100644 index 0000000..8cff30c --- /dev/null +++ b/cmake/FindGLUT.cmake @@ -0,0 +1,127 @@ +# - try to find glut library and include files +# GLUT_INCLUDE_DIR, where to find GL/glut.h, etc. +# GLUT_LIBRARIES, the libraries to link against +# GLUT_FOUND, If false, do not try to use GLUT. +# Also defined, but not for general use are: +# GLUT_glut_LIBRARY = the full path to the glut library. +# GLUT_Xmu_LIBRARY = the full path to the Xmu library. +# GLUT_Xi_LIBRARY = the full path to the Xi Library. + +IF (WIN32) + + IF(CYGWIN) + + FIND_PATH( GLUT_INCLUDE_DIR GL/glut.h + /usr/include + ) + + FIND_LIBRARY( GLUT_glut_LIBRARY glut32 + ${OPENGL_LIBRARY_DIR} + /usr/lib + /usr/lib/w32api + /usr/local/lib + /usr/X11R6/lib + ) + + + ELSE(CYGWIN) + +# FIND_PATH( GLUT_INCLUDE_DIR GL/glut.h +# ${GLUT_ROOT_PATH}/include +# ) + +# FIND_LIBRARY( GLUT_glut_LIBRARY glut32 +# ${GLUT_ROOT_PATH}/lib +# ${OPENGL_LIBRARY_DIR} +# ) + + FIND_PATH( GLUT_INCLUDE_DIR GL/glut.h + ${GLUT_ROOT_PATH}/include + ${PROJECT_SOURCE_DIR}/src/nvgl/glut/include + DOC "The directory where GL/glut.h resides") + FIND_LIBRARY( GLUT_glut_LIBRARY + NAMES glut GLUT glut32 glut32s + PATHS + ${GLUT_ROOT_PATH}/lib + ${PROJECT_SOURCE_DIR}/src/nvgl/glut/bin + ${PROJECT_SOURCE_DIR}/src/nvgl/glut/lib + ${OPENGL_LIBRARY_DIR} + DOC "The GLUT library") + + ENDIF(CYGWIN) + +ELSE (WIN32) + + IF (APPLE) +# These values for Apple could probably do with improvement. + FIND_PATH( GLUT_INCLUDE_DIR glut.h + /System/Library/Frameworks/GLUT.framework/Versions/A/Headers + ${OPENGL_LIBRARY_DIR} + ) + SET(GLUT_glut_LIBRARY "-framework Glut" CACHE STRING "GLUT library for OSX") + SET(GLUT_cocoa_LIBRARY "-framework Cocoa" CACHE STRING "Cocoa framework for OSX") + ELSE (APPLE) + + FIND_PATH( GLUT_INCLUDE_DIR GL/glut.h + /usr/include + /usr/include/GL + /usr/local/include + /usr/openwin/share/include + /usr/openwin/include + /usr/X11R6/include + /usr/include/X11 + /opt/graphics/OpenGL/include + /opt/graphics/OpenGL/contrib/libglut + ) + + FIND_LIBRARY( GLUT_glut_LIBRARY glut + /usr/lib + /usr/local/lib + /usr/openwin/lib + /usr/X11R6/lib + ) + + FIND_LIBRARY( GLUT_Xi_LIBRARY Xi + /usr/lib + /usr/local/lib + /usr/openwin/lib + /usr/X11R6/lib + ) + + FIND_LIBRARY( GLUT_Xmu_LIBRARY Xmu + /usr/lib + /usr/local/lib + /usr/openwin/lib + /usr/X11R6/lib + ) + + ENDIF (APPLE) + +ENDIF (WIN32) + +SET( GLUT_FOUND "NO" ) +IF(GLUT_INCLUDE_DIR) + IF(GLUT_glut_LIBRARY) + # Is -lXi and -lXmu required on all platforms that have it? + # If not, we need some way to figure out what platform we are on. + SET( GLUT_LIBRARIES + ${GLUT_glut_LIBRARY} + ${GLUT_Xmu_LIBRARY} + ${GLUT_Xi_LIBRARY} + ${GLUT_cocoa_LIBRARY} + ) + SET( GLUT_FOUND "YES" ) + +#The following deprecated settings are for backwards compatibility with CMake1.4 + SET (GLUT_LIBRARY ${GLUT_LIBRARIES}) + SET (GLUT_INCLUDE_PATH ${GLUT_INCLUDE_DIR}) + + ENDIF(GLUT_glut_LIBRARY) +ENDIF(GLUT_INCLUDE_DIR) + +MARK_AS_ADVANCED( + GLUT_INCLUDE_DIR + GLUT_glut_LIBRARY + GLUT_Xmu_LIBRARY + GLUT_Xi_LIBRARY +) diff --git a/cmake/OptimalOptions.cmake b/cmake/OptimalOptions.cmake index 96f2700..f91f180 100644 --- a/cmake/OptimalOptions.cmake +++ b/cmake/OptimalOptions.cmake @@ -9,10 +9,9 @@ IF(CMAKE_COMPILER_IS_GNUCXX) ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "i586") IF(NV_SYSTEM_PROCESSOR STREQUAL "i686") - #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686") #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpmath=sse -mtune=i686 -msse3") #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=pentium4") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=prescott") ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "i686") IF(NV_SYSTEM_PROCESSOR STREQUAL "x86_64") @@ -21,25 +20,12 @@ IF(CMAKE_COMPILER_IS_GNUCXX) ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "x86_64") IF(NV_SYSTEM_PROCESSOR STREQUAL "powerpc") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=powerpc -faltivec -maltivec -mabi=altivec -mpowerpc-gfxopt") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=powerpc -maltivec -mabi=altivec -mpowerpc-gfxopt") # ibook G4: - #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=7450 -mtune=7450 -faltivec -maltivec -mabi=altivec -mpowerpc-gfxopt") - - # G5 - #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=G5 -faltivec -maltivec -mabi=altivec -mpowerpc-gfxopt") - + #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=7450 -mtune=7450 -maltivec -mabi=altivec -mpowerpc-gfxopt") ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "powerpc") -# IF(DARWIN) -# SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=10.5 -isysroot /Developer/SDKs/MacOSX10.5.sdk") -# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.5 -isysroot /Developer/SDKs/MacOSX10.5.sdk") -# ENDIF(DARWIN) - - IF(CMAKE_BUILD_TYPE STREQUAL "debug") - ADD_DEFINITIONS(-D_DEBUG) - ENDIF(CMAKE_BUILD_TYPE STREQUAL "debug") - ENDIF(CMAKE_COMPILER_IS_GNUCXX) IF(MSVC) diff --git a/cmake/make2cmake.cmake b/cmake/make2cmake.cmake deleted file mode 100755 index 6ae2642..0000000 --- a/cmake/make2cmake.cmake +++ /dev/null @@ -1,61 +0,0 @@ - -# For more information, please see: http://software.sci.utah.edu -# -# The MIT License -# -# Copyright (c) 2007 -# Scientific Computing and Imaging Institute, University of Utah -# -# License for the specific language governing rights and limitations under -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# Make2cmake CMake Script -# Abe Stephens and James Bigler -# (c) 2007 Scientific Computing and Imaging Institute, University of Utah -# Note that the REGEX expressions may need to be tweaked for different dependency generators. - -file(READ ${input_file} depend_text) - -if (${depend_text} MATCHES ".+") - - # message("FOUND DEPENDS") - - # Remember, four backslashes is escaped to one backslash in the string. - string(REGEX REPLACE "\\\\ " " " depend_text ${depend_text}) - - # This works for the nvcc -M generated dependency files. - string(REGEX REPLACE "^.* : " "" depend_text ${depend_text}) - string(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text}) - - foreach(file ${depend_text}) - - string(REGEX REPLACE "^ +" "" file ${file}) - - if(NOT IS_DIRECTORY ${file}) - set(cuda_nvcc_depend "${cuda_nvcc_depend} \"${file}\"\n") - endif(NOT IS_DIRECTORY ${file}) - - endforeach(file) - -else() - # message("FOUND NO DEPENDS") -endif() - - -file(WRITE ${output_file} "# Generated by: make2cmake.cmake\nSET(CUDA_NVCC_DEPEND\n ${cuda_nvcc_depend})\n\n") diff --git a/cmake/parse_cubin.cmake b/cmake/parse_cubin.cmake deleted file mode 100644 index 6b3d82a..0000000 --- a/cmake/parse_cubin.cmake +++ /dev/null @@ -1,105 +0,0 @@ -# For more information, please see: http://software.sci.utah.edu -# -# The MIT License -# -# Copyright (c) 2007 -# Scientific Computing and Imaging Institute, University of Utah -# -# License for the specific language governing rights and limitations under -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# .cubin Parsing CMake Script -# Abe Stephens -# (c) 2007 Scientific Computing and Imaging Institute, University of Utah - -file(READ ${input_file} file_text) - -if (${file_text} MATCHES ".+") - - # Remember, four backslashes is escaped to one backslash in the string. - string(REGEX REPLACE ";" "\\\\;" file_text ${file_text}) - string(REGEX REPLACE "\ncode" ";code" file_text ${file_text}) - - list(LENGTH file_text len) - - foreach(line ${file_text}) - - # Only look at "code { }" blocks. - if(line MATCHES "^code") - - # Break into individual lines. - string(REGEX REPLACE "\n" ";" line ${line}) - - foreach(entry ${line}) - - # Extract kernel names. - if (${entry} MATCHES "[^g]name = ([^ ]+)") - string(REGEX REPLACE ".* = ([^ ]+)" "\\1" entry ${entry}) - - # Check to see if the kernel name starts with "_" - set(skip FALSE) - # if (${entry} MATCHES "^_") - # Skip the rest of this block. - # message("Skipping ${entry}") - # set(skip TRUE) - # else (${entry} MATCHES "^_") - message("Kernel: ${entry}") - # endif (${entry} MATCHES "^_") - - endif(${entry} MATCHES "[^g]name = ([^ ]+)") - - # Skip the rest of the block if necessary - if(NOT skip) - - # Registers - if (${entry} MATCHES "reg = ([^ ]+)") - string(REGEX REPLACE ".* = ([^ ]+)" "\\1" entry ${entry}) - message("Registers: ${entry}") - endif(${entry} MATCHES "reg = ([^ ]+)") - - # Local memory - if (${entry} MATCHES "lmem = ([^ ]+)") - string(REGEX REPLACE ".* = ([^ ]+)" "\\1" entry ${entry}) - message("Local: ${entry}") - endif(${entry} MATCHES "lmem = ([^ ]+)") - - # Shared memory - if (${entry} MATCHES "smem = ([^ ]+)") - string(REGEX REPLACE ".* = ([^ ]+)" "\\1" entry ${entry}) - message("Shared: ${entry}") - endif(${entry} MATCHES "smem = ([^ ]+)") - - if (${entry} MATCHES "^}") - message("") - endif(${entry} MATCHES "^}") - - endif(NOT skip) - - - endforeach(entry) - - endif(line MATCHES "^code") - - endforeach(line) - -else() - # message("FOUND NO DEPENDS") -endif() - - diff --git a/cmake/run_nvcc.cmake b/cmake/run_nvcc.cmake deleted file mode 100755 index 09d1624..0000000 --- a/cmake/run_nvcc.cmake +++ /dev/null @@ -1,227 +0,0 @@ -# This file runs the nvcc commands to produce the desired output file along with -# the dependency file needed by CMake to compute dependencies. In addition the -# file checks the output of each command and if the command fails it deletes the -# output files. - -# Input variables -# -# verbose:BOOL=<> OFF: Be as quiet as possible (default) -# ON : Describe each step -# -# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or -# RelWithDebInfo, but it should match one of the -# entries in CUDA_HOST_FLAGS. This is the build -# configuration used when compiling the code. If -# blank or unspecified Debug is assumed as this is -# what CMake does. -# -# generated_file:STRING=<> File to generate. This argument must be passed in. -# -# generated_cubin_file:STRING=<> File to generate. This argument must be passed -# in if build_cubin is true. - -if(NOT generated_file) - message(FATAL_ERROR "You must specify generated_file on the command line") -endif() - -# Set these up as variables to make reading the generated file easier -set(CMAKE_COMMAND "@CMAKE_COMMAND@") -set(source_file "@source_file@") -set(NVCC_generated_dependency_file "@NVCC_generated_dependency_file@") -set(cmake_dependency_file "@cmake_dependency_file@") -set(CUDA_make2cmake "@CUDA_make2cmake@") -set(CUDA_parse_cubin "@CUDA_parse_cubin@") -set(build_cubin @build_cubin@) -# We won't actually use these variables for now, but we need to set this, in -# order to force this file to be run again if it changes. -set(generated_file_path "@generated_file_path@") -set(generated_file_internal "@generated_file@") -set(generated_cubin_file_internal "@generated_cubin_file@") - -set(CUDA_NVCC_EXECUTABLE "@CUDA_NVCC_EXECUTABLE@") -set(CUDA_NVCC_FLAGS "@CUDA_NVCC_FLAGS@;@CUDA_WRAP_OPTION_NVCC_FLAGS@") -@CUDA_NVCC_FLAGS_CONFIG@ -set(nvcc_flags "@nvcc_flags@") -set(CUDA_NVCC_INCLUDE_ARGS "@CUDA_NVCC_INCLUDE_ARGS@") -set(format_flag "@format_flag@") - -if(build_cubin AND NOT generated_cubin_file) - message(FATAL_ERROR "You must specify generated_cubin_file on the command line") -endif() - -# This is the list of host compilation flags. It C or CXX should already have -# been chosen by FindCUDA.cmake. -@CUDA_HOST_FLAGS@ - -# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler -set(nvcc_host_compiler_flags "") -# If we weren't given a build_configuration, use Debug. -if(NOT build_configuration) - set(build_configuration Debug) -endif() -string(TOUPPER "${build_configuration}" build_configuration) -#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}") -foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) - # Extra quotes are added around each flag to help nvcc parse out flags with spaces. - set(nvcc_host_compiler_flags "${nvcc_host_compiler_flags},\"${flag}\"") -endforeach() -if (nvcc_host_compiler_flags) - set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags}) -endif() -#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"") -# Add the build specific configuration flags -list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}}) - -if(DEFINED CCBIN) - set(CCBIN -ccbin "${CCBIN}") -endif() - -# cuda_execute_process - Executes a command with optional command echo and status message. -# -# status - Status message to print if verbose is true -# command - COMMAND argument from the usual execute_process argument structure -# ARGN - Remaining arguments are the command with arguments -# -# CUDA_result - return value from running the command -# -# Make this a macro instead of a function, so that things like RESULT_VARIABLE -# and other return variables are present after executing the process. -macro(cuda_execute_process status command) - set(_command ${command}) - if(NOT _command STREQUAL "COMMAND") - message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})") - endif() - if(verbose) - execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) - # Now we need to build up our command string. We are accounting for quotes - # and spaces, anything else is left up to the user to fix if they want to - # copy and paste a runnable command line. - set(cuda_execute_process_string) - foreach(arg ${ARGN}) - # If there are quotes, excape them, so they come through. - string(REPLACE "\"" "\\\"" arg ${arg}) - # Args with spaces need quotes around them to get them to be parsed as a single argument. - if(arg MATCHES " ") - list(APPEND cuda_execute_process_string "\"${arg}\"") - else() - list(APPEND cuda_execute_process_string ${arg}) - endif() - endforeach() - # Echo the command - execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string}) - endif(verbose) - # Run the command - execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result ) -endmacro() - -# Delete the target file -cuda_execute_process( - "Removing ${generated_file}" - COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" - ) - -# Generate the dependency file -cuda_execute_process( - "Generating dependency file: ${NVCC_generated_dependency_file}" - COMMAND "${CUDA_NVCC_EXECUTABLE}" - "${source_file}" - ${CUDA_NVCC_FLAGS} - ${nvcc_flags} - ${CCBIN} - ${nvcc_host_compiler_flags} - -DNVCC - -M - -o "${NVCC_generated_dependency_file}" - ${CUDA_NVCC_INCLUDE_ARGS} - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Generate the cmake readable dependency file to a temp file. Don't put the -# quotes just around the filenames for the input_file and output_file variables. -# CMake will pass the quotes through and not be able to find the file. -cuda_execute_process( - "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" - COMMAND "${CMAKE_COMMAND}" - -D "input_file:FILEPATH=${NVCC_generated_dependency_file}" - -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" - -P "${CUDA_make2cmake}" - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Copy the file if it is different -cuda_execute_process( - "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" - COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Delete the temporary file -cuda_execute_process( - "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}" - COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}" - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Generate the code -cuda_execute_process( - "Generating ${generated_file}" - COMMAND "${CUDA_NVCC_EXECUTABLE}" - "${source_file}" - ${CUDA_NVCC_FLAGS} - ${nvcc_flags} - ${CCBIN} - ${nvcc_host_compiler_flags} - -DNVCC - ${format_flag} -o "${generated_file}" - ${CUDA_NVCC_INCLUDE_ARGS} - ) - -if(CUDA_result) - # Since nvcc can sometimes leave half done files make sure that we delete the output file. - cuda_execute_process( - "Removing ${generated_file}" - COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" - ) - message(FATAL_ERROR "Error generating file ${generated_file}") -else() - message("Generated ${generated_file} successfully.") -endif() - -# Cubin resource report commands. -if( build_cubin ) - # Run with -cubin to produce resource usage report. - cuda_execute_process( - "Generating ${generated_cubin_file}" - COMMAND "${CUDA_NVCC_EXECUTABLE}" - "${source_file}" - ${CUDA_NVCC_FLAGS} - ${nvcc_flags} - ${CCBIN} - ${nvcc_host_compiler_flags} - -DNVCC - -cubin - -o "${generated_cubin_file}" - ${CUDA_NVCC_INCLUDE_ARGS} - ) - - # Execute the parser script. - cuda_execute_process( - "Executing the parser script" - COMMAND "${CMAKE_COMMAND}" - -D "input_file:STRING=${generated_cubin_file}" - -P "${CUDA_parse_cubin}" - ) - -endif( build_cubin ) diff --git a/configure b/configure index e130c85..5892f52 100755 --- a/configure +++ b/configure @@ -51,8 +51,8 @@ fi echo "-- Configuring nvidia-texture-tools "`cat VERSION` -mkdir -p ./build-$build -cd ./build-$build +mkdir -p ./build +cd ./build $CMAKE .. -DNVTT_SHARED=1 -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1 cd .. @@ -62,15 +62,11 @@ echo "" cat > Makefile << EOF all: - @make --no-print-directory -C build-$build/ + @make --no-print-directory -C build/ install: - @make install --no-print-directory -C build-$build/ -package: - @make package --no-print-directory -C build-$build/ -test: - @make test --no-print-directory -C build-$build/ + @make install --no-print-directory -C build/ clean: - @make clean --no-print-directory -C build-$build/ + @make clean --no-print-directory -C build/ distclean: - @rm -Rf build-$build/ + @rm -Rf build/ EOF diff --git a/data/bugs/85/Sky.tga b/data/bugs/85/Sky.tga deleted file mode 100644 index 73064d6..0000000 Binary files a/data/bugs/85/Sky.tga and /dev/null differ diff --git a/data/bugs/85/sky_dither.dds b/data/bugs/85/sky_dither.dds deleted file mode 100644 index e87b339..0000000 Binary files a/data/bugs/85/sky_dither.dds and /dev/null differ diff --git a/data/bugs/85/sky_error.dds b/data/bugs/85/sky_error.dds deleted file mode 100644 index 6de1689..0000000 Binary files a/data/bugs/85/sky_error.dds and /dev/null differ diff --git a/data/bugs/85/sky_screenshot_1.png b/data/bugs/85/sky_screenshot_1.png deleted file mode 100644 index 729a445..0000000 Binary files a/data/bugs/85/sky_screenshot_1.png and /dev/null differ diff --git a/data/bugs/85/tables.cpp b/data/bugs/85/tables.cpp deleted file mode 100644 index 590aef9..0000000 --- a/data/bugs/85/tables.cpp +++ /dev/null @@ -1,95 +0,0 @@ - -#include -#include - -typedef unsigned char uint8; - - -static int Mul8Bit(int a, int b) -{ - int t = a * b + 128; - return (t + (t >> 8)) >> 8; -} - -// this exactly matches the (fm*2 + to)/3 -static inline int Lerp13_16bit(int fm, int to) -{ - int t = fm * (2 * 0xAAAB) + to * 0xAAAB; - return t >> 17; -} - -static inline int Lerp13(int fm, int to) -{ - return (fm * 2 + to) / 3; -} - - -static void PrepareOptTable(uint8 * Table, const uint8 * expand, int size) -{ - for (int i = 0; i < 256; i++) - { - float bestErr = 256; - - for (int min = 0; min < size; min++) - { - for (int max = 0; max < size; max++) - { - int mine = expand[min]; - int maxe = expand[max]; - //if (maxe - mine < 32) - { - //printf("%d <-> %d\n", maxe + Mul8Bit(mine-maxe, 0x55), Lerp13(maxe, mine)); - //int err = abs(Lerp13_16bit(mine, maxe) - i); - //int err = abs(maxe + Mul8Bit(mine-maxe, 0x55) - i); - float err = abs(Lerp13(maxe, mine) - i); - //err += 0.03f * abs(maxe - mine); - err += 0.03f * abs(max - min); - - if (err < bestErr) - { - Table[i*2+0] = max; - Table[i*2+1] = min; - bestErr = err; - } - } - } - } - printf("%d: %f %d\n", i, bestErr, abs(Table[i*2+0] - Table[i*2+1])); - } -} - - -int main() -{ - uint8 OMatch5[256*2]; - uint8 OMatch6[256*2]; - - uint8 Expand5[32]; - uint8 Expand6[64]; - - for (int i=0; i<32; i++) - Expand5[i] = (i<<3)|(i>>2); - - for (int i=0; i<64; i++) - Expand6[i] = (i<<2)|(i>>4); - - PrepareOptTable(OMatch5, Expand5, 32); - PrepareOptTable(OMatch6, Expand6, 64); - - printf("const static uint8 OMatch5[256][2] = {\n"); - for (int i = 0; i < 256; i++) - { - printf("\t{0x%.2X, 0x%.2X},\n", OMatch5[2*i+0], OMatch5[2*i+1]); - } - printf("}\n"); - - printf("const static uint8 OMatch6[256][2] = {\n"); - for (int i = 0; i < 256; i++) - { - printf("\t{0x%.2X, 0x%.2X},\n", OMatch6[2*i+0], OMatch6[2*i+1]); - } - printf("}\n"); - - return 0; -} - diff --git a/data/bugs/88/nmap.png b/data/bugs/88/nmap.png deleted file mode 100644 index bef4351..0000000 Binary files a/data/bugs/88/nmap.png and /dev/null differ diff --git a/data/bugs/mthomson/world.png b/data/bugs/mthomson/world.png deleted file mode 100755 index e96a67c..0000000 Binary files a/data/bugs/mthomson/world.png and /dev/null differ diff --git a/data/bugs/mthomson/world.tif b/data/bugs/mthomson/world.tif deleted file mode 100755 index 7272506..0000000 Binary files a/data/bugs/mthomson/world.tif and /dev/null differ diff --git a/data/testsuite/Bradley1.png b/data/testsuite/Bradley1.png deleted file mode 100644 index c03cf48..0000000 Binary files a/data/testsuite/Bradley1.png and /dev/null differ diff --git a/data/testsuite/Gradient.png b/data/testsuite/Gradient.png deleted file mode 100644 index f3c537e..0000000 Binary files a/data/testsuite/Gradient.png and /dev/null differ diff --git a/data/testsuite/MoreRocks.png b/data/testsuite/MoreRocks.png deleted file mode 100644 index 4cbadeb..0000000 Binary files a/data/testsuite/MoreRocks.png and /dev/null differ diff --git a/data/testsuite/Rainbow.png b/data/testsuite/Rainbow.png deleted file mode 100644 index 94676e7..0000000 Binary files a/data/testsuite/Rainbow.png and /dev/null differ diff --git a/data/testsuite/Text.png b/data/testsuite/Text.png deleted file mode 100644 index 57e4e27..0000000 Binary files a/data/testsuite/Text.png and /dev/null differ diff --git a/data/testsuite/Wall.png b/data/testsuite/Wall.png deleted file mode 100644 index ee30e74..0000000 Binary files a/data/testsuite/Wall.png and /dev/null differ diff --git a/data/testsuite/clegg.png b/data/testsuite/clegg.png deleted file mode 100644 index 71aecf8..0000000 Binary files a/data/testsuite/clegg.png and /dev/null differ diff --git a/data/testsuite/frymire.png b/data/testsuite/frymire.png deleted file mode 100644 index 1950f97..0000000 Binary files a/data/testsuite/frymire.png and /dev/null differ diff --git a/data/testsuite/kodim01.png b/data/testsuite/kodim01.png deleted file mode 100644 index 14317f0..0000000 Binary files a/data/testsuite/kodim01.png and /dev/null differ diff --git a/data/testsuite/kodim02.png b/data/testsuite/kodim02.png deleted file mode 100644 index 20c7206..0000000 Binary files a/data/testsuite/kodim02.png and /dev/null differ diff --git a/data/testsuite/kodim03.png b/data/testsuite/kodim03.png deleted file mode 100644 index d57e8bd..0000000 Binary files a/data/testsuite/kodim03.png and /dev/null differ diff --git a/data/testsuite/kodim04.png b/data/testsuite/kodim04.png deleted file mode 100644 index de0991c..0000000 Binary files a/data/testsuite/kodim04.png and /dev/null differ diff --git a/data/testsuite/kodim05.png b/data/testsuite/kodim05.png deleted file mode 100644 index 400e4cf..0000000 Binary files a/data/testsuite/kodim05.png and /dev/null differ diff --git a/data/testsuite/kodim06.png b/data/testsuite/kodim06.png deleted file mode 100644 index 949d1e2..0000000 Binary files a/data/testsuite/kodim06.png and /dev/null differ diff --git a/data/testsuite/kodim07.png b/data/testsuite/kodim07.png deleted file mode 100644 index d04374d..0000000 Binary files a/data/testsuite/kodim07.png and /dev/null differ diff --git a/data/testsuite/kodim08.png b/data/testsuite/kodim08.png deleted file mode 100644 index d628801..0000000 Binary files a/data/testsuite/kodim08.png and /dev/null differ diff --git a/data/testsuite/kodim09.png b/data/testsuite/kodim09.png deleted file mode 100644 index 1d47188..0000000 Binary files a/data/testsuite/kodim09.png and /dev/null differ diff --git a/data/testsuite/kodim10.png b/data/testsuite/kodim10.png deleted file mode 100644 index 2e4b18b..0000000 Binary files a/data/testsuite/kodim10.png and /dev/null differ diff --git a/data/testsuite/kodim11.png b/data/testsuite/kodim11.png deleted file mode 100644 index 3c66526..0000000 Binary files a/data/testsuite/kodim11.png and /dev/null differ diff --git a/data/testsuite/kodim12.png b/data/testsuite/kodim12.png deleted file mode 100644 index 55db018..0000000 Binary files a/data/testsuite/kodim12.png and /dev/null differ diff --git a/data/testsuite/kodim13.png b/data/testsuite/kodim13.png deleted file mode 100644 index b35c9d3..0000000 Binary files a/data/testsuite/kodim13.png and /dev/null differ diff --git a/data/testsuite/kodim14.png b/data/testsuite/kodim14.png deleted file mode 100644 index e7b3283..0000000 Binary files a/data/testsuite/kodim14.png and /dev/null differ diff --git a/data/testsuite/kodim15.png b/data/testsuite/kodim15.png deleted file mode 100644 index 9c5eacf..0000000 Binary files a/data/testsuite/kodim15.png and /dev/null differ diff --git a/data/testsuite/kodim16.png b/data/testsuite/kodim16.png deleted file mode 100644 index 54b0d0d..0000000 Binary files a/data/testsuite/kodim16.png and /dev/null differ diff --git a/data/testsuite/kodim17.png b/data/testsuite/kodim17.png deleted file mode 100644 index 5e4583e..0000000 Binary files a/data/testsuite/kodim17.png and /dev/null differ diff --git a/data/testsuite/kodim18.png b/data/testsuite/kodim18.png deleted file mode 100644 index 48097fc..0000000 Binary files a/data/testsuite/kodim18.png and /dev/null differ diff --git a/data/testsuite/kodim19.png b/data/testsuite/kodim19.png deleted file mode 100644 index e359a32..0000000 Binary files a/data/testsuite/kodim19.png and /dev/null differ diff --git a/data/testsuite/kodim20.png b/data/testsuite/kodim20.png deleted file mode 100644 index 86e2837..0000000 Binary files a/data/testsuite/kodim20.png and /dev/null differ diff --git a/data/testsuite/kodim21.png b/data/testsuite/kodim21.png deleted file mode 100644 index ccd4c3f..0000000 Binary files a/data/testsuite/kodim21.png and /dev/null differ diff --git a/data/testsuite/kodim22.png b/data/testsuite/kodim22.png deleted file mode 100644 index 52bcd47..0000000 Binary files a/data/testsuite/kodim22.png and /dev/null differ diff --git a/data/testsuite/kodim23.png b/data/testsuite/kodim23.png deleted file mode 100644 index ff22e83..0000000 Binary files a/data/testsuite/kodim23.png and /dev/null differ diff --git a/data/testsuite/kodim24.png b/data/testsuite/kodim24.png deleted file mode 100644 index 85bd64f..0000000 Binary files a/data/testsuite/kodim24.png and /dev/null differ diff --git a/data/testsuite/lena.png b/data/testsuite/lena.png deleted file mode 100644 index 7827505..0000000 Binary files a/data/testsuite/lena.png and /dev/null differ diff --git a/data/testsuite/lugaru-blood.png b/data/testsuite/lugaru-blood.png deleted file mode 100644 index 852f7fe..0000000 Binary files a/data/testsuite/lugaru-blood.png and /dev/null differ diff --git a/data/testsuite/lugaru-bush.png b/data/testsuite/lugaru-bush.png deleted file mode 100644 index 24e0462..0000000 Binary files a/data/testsuite/lugaru-bush.png and /dev/null differ diff --git a/data/testsuite/lugaru-cursor.png b/data/testsuite/lugaru-cursor.png deleted file mode 100644 index afea54b..0000000 Binary files a/data/testsuite/lugaru-cursor.png and /dev/null differ diff --git a/data/testsuite/lugaru-hawk.png b/data/testsuite/lugaru-hawk.png deleted file mode 100644 index 5ef2abc..0000000 Binary files a/data/testsuite/lugaru-hawk.png and /dev/null differ diff --git a/data/testsuite/monarch.png b/data/testsuite/monarch.png deleted file mode 100644 index 9408b7d..0000000 Binary files a/data/testsuite/monarch.png and /dev/null differ diff --git a/data/testsuite/peppers.png b/data/testsuite/peppers.png deleted file mode 100644 index 39eae06..0000000 Binary files a/data/testsuite/peppers.png and /dev/null differ diff --git a/data/testsuite/q3-blocks15cgeomtrn.tga b/data/testsuite/q3-blocks15cgeomtrn.tga deleted file mode 100644 index 97ea5d8..0000000 Binary files a/data/testsuite/q3-blocks15cgeomtrn.tga and /dev/null differ diff --git a/data/testsuite/q3-blocks17bloody.tga b/data/testsuite/q3-blocks17bloody.tga deleted file mode 100644 index b6d25cf..0000000 Binary files a/data/testsuite/q3-blocks17bloody.tga and /dev/null differ diff --git a/data/testsuite/q3-dark_tin2.tga b/data/testsuite/q3-dark_tin2.tga deleted file mode 100644 index fb708f6..0000000 Binary files a/data/testsuite/q3-dark_tin2.tga and /dev/null differ diff --git a/data/testsuite/q3-fan.tga b/data/testsuite/q3-fan.tga deleted file mode 100644 index a06391f..0000000 Binary files a/data/testsuite/q3-fan.tga and /dev/null differ diff --git a/data/testsuite/q3-fan_grate.tga b/data/testsuite/q3-fan_grate.tga deleted file mode 100644 index 4ce6a36..0000000 Binary files a/data/testsuite/q3-fan_grate.tga and /dev/null differ diff --git a/data/testsuite/q3-metal2_2.tga b/data/testsuite/q3-metal2_2.tga deleted file mode 100644 index 909bce0..0000000 Binary files a/data/testsuite/q3-metal2_2.tga and /dev/null differ diff --git a/data/testsuite/q3-panel_glo.tga b/data/testsuite/q3-panel_glo.tga deleted file mode 100644 index 8b9efcc..0000000 Binary files a/data/testsuite/q3-panel_glo.tga and /dev/null differ diff --git a/data/testsuite/q3-proto_fence.tga b/data/testsuite/q3-proto_fence.tga deleted file mode 100644 index ad84de8..0000000 Binary files a/data/testsuite/q3-proto_fence.tga and /dev/null differ diff --git a/data/testsuite/q3-wires02.tga b/data/testsuite/q3-wires02.tga deleted file mode 100644 index 60384e8..0000000 Binary files a/data/testsuite/q3-wires02.tga and /dev/null differ diff --git a/data/testsuite/sail.png b/data/testsuite/sail.png deleted file mode 100644 index 410967d..0000000 Binary files a/data/testsuite/sail.png and /dev/null differ diff --git a/data/testsuite/serrano.png b/data/testsuite/serrano.png deleted file mode 100644 index e211431..0000000 Binary files a/data/testsuite/serrano.png and /dev/null differ diff --git a/data/testsuite/t.2d.pn02.bmp b/data/testsuite/t.2d.pn02.bmp deleted file mode 100644 index 2b6cf76..0000000 Binary files a/data/testsuite/t.2d.pn02.bmp and /dev/null differ diff --git a/data/testsuite/t.aircondition.01.bmp b/data/testsuite/t.aircondition.01.bmp deleted file mode 100644 index c8ef6d0..0000000 Binary files a/data/testsuite/t.aircondition.01.bmp and /dev/null differ diff --git a/data/testsuite/t.bricks.02.bmp b/data/testsuite/t.bricks.02.bmp deleted file mode 100644 index 48232ad..0000000 Binary files a/data/testsuite/t.bricks.02.bmp and /dev/null differ diff --git a/data/testsuite/t.bricks.05.bmp b/data/testsuite/t.bricks.05.bmp deleted file mode 100644 index ce3d34a..0000000 Binary files a/data/testsuite/t.bricks.05.bmp and /dev/null differ diff --git a/data/testsuite/t.concrete.cracked.01.bmp b/data/testsuite/t.concrete.cracked.01.bmp deleted file mode 100644 index 3df2a63..0000000 Binary files a/data/testsuite/t.concrete.cracked.01.bmp and /dev/null differ diff --git a/data/testsuite/t.envi.colored02.bmp b/data/testsuite/t.envi.colored02.bmp deleted file mode 100644 index 81b427c..0000000 Binary files a/data/testsuite/t.envi.colored02.bmp and /dev/null differ diff --git a/data/testsuite/t.envi.colored03.bmp b/data/testsuite/t.envi.colored03.bmp deleted file mode 100644 index 54c27a1..0000000 Binary files a/data/testsuite/t.envi.colored03.bmp and /dev/null differ diff --git a/data/testsuite/t.font.01.bmp b/data/testsuite/t.font.01.bmp deleted file mode 100644 index 0ceee55..0000000 Binary files a/data/testsuite/t.font.01.bmp and /dev/null differ diff --git a/data/testsuite/t.sewers.01.bmp b/data/testsuite/t.sewers.01.bmp deleted file mode 100644 index e031008..0000000 Binary files a/data/testsuite/t.sewers.01.bmp and /dev/null differ diff --git a/data/testsuite/t.train.03.bmp b/data/testsuite/t.train.03.bmp deleted file mode 100644 index 3e00410..0000000 Binary files a/data/testsuite/t.train.03.bmp and /dev/null differ diff --git a/data/testsuite/t.yello.01.bmp b/data/testsuite/t.yello.01.bmp deleted file mode 100644 index 363c5d9..0000000 Binary files a/data/testsuite/t.yello.01.bmp and /dev/null differ diff --git a/data/testsuite/tulips.png b/data/testsuite/tulips.png deleted file mode 100644 index 880d7a9..0000000 Binary files a/data/testsuite/tulips.png and /dev/null differ diff --git a/doc/release_todo b/doc/release_todo deleted file mode 100644 index f5ea772..0000000 --- a/doc/release_todo +++ /dev/null @@ -1,4 +0,0 @@ -Update version number in nvimage/DirectDrawSurface.cpp -Update version number in CMakeLists.txt -Update version number in VERSION -Update version number in NVIDIA_Texture_Tools_README.txt diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt deleted file mode 100644 index 90cb4e1..0000000 --- a/extern/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ - -IF(WIN32) - ADD_SUBDIRECTORY(gnuwin32) -ENDIF(WIN32) - -ADD_SUBDIRECTORY(poshlib) diff --git a/extern/FreeImage/FreeImage.dll b/extern/FreeImage/FreeImage.dll deleted file mode 100755 index 05859a8..0000000 Binary files a/extern/FreeImage/FreeImage.dll and /dev/null differ diff --git a/extern/FreeImage/FreeImage.h b/extern/FreeImage/FreeImage.h deleted file mode 100644 index 7125acc..0000000 --- a/extern/FreeImage/FreeImage.h +++ /dev/null @@ -1,1046 +0,0 @@ -// ========================================================== -// FreeImage 3 -// -// Design and implementation by -// - Floris van den Berg (flvdberg@wxs.nl) -// - Hervé Drolon (drolon@infonie.fr) -// -// Contributors: -// - Adam Gates (radad@xoasis.com) -// - Alex Kwak -// - Alexander Dymerets (sashad@te.net.ua) -// - Detlev Vendt (detlev.vendt@brillit.de) -// - Jan L. Nauta (jln@magentammt.com) -// - Jani Kajala (janik@remedy.fi) -// - Juergen Riecker (j.riecker@gmx.de) -// - Karl-Heinz Bussian (khbussian@moss.de) -// - Laurent Rocher (rocherl@club-internet.fr) -// - Luca Piergentili (l.pierge@terra.es) -// - Machiel ten Brinke (brinkem@uni-one.nl) -// - Markus Loibl (markus.loibl@epost.de) -// - Martin Weber (martweb@gmx.net) -// - Matthias Wandel (mwandel@rim.net) -// - Michal Novotny (michal@etc.cz) -// - Petr Pytelka (pyta@lightcomp.com) -// - Riley McNiff (rmcniff@marexgroup.com) -// - Ryan Rubley (ryan@lostreality.org) -// - Volker Gärtner (volkerg@gmx.at) -// -// This file is part of FreeImage 3 -// -// COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, WITHOUT WARRANTY -// OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES -// THAT THE COVERED CODE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE -// OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED -// CODE IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT -// THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY -// SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL -// PART OF THIS LICENSE. NO USE OF ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER -// THIS DISCLAIMER. -// -// Use at your own risk! -// ========================================================== - -#ifndef FREEIMAGE_H -#define FREEIMAGE_H - -// Version information ------------------------------------------------------ - -#define FREEIMAGE_MAJOR_VERSION 3 -#define FREEIMAGE_MINOR_VERSION 10 -#define FREEIMAGE_RELEASE_SERIAL 0 - -// Compiler options --------------------------------------------------------- - -#include // needed for UNICODE functions - -#if defined(FREEIMAGE_LIB) - #define DLL_API - #define DLL_CALLCONV -#else - #if defined(_WIN32) || defined(__WIN32__) - #define DLL_CALLCONV __stdcall - // The following ifdef block is the standard way of creating macros which make exporting - // from a DLL simpler. All files within this DLL are compiled with the FREEIMAGE_EXPORTS - // symbol defined on the command line. this symbol should not be defined on any project - // that uses this DLL. This way any other project whose source files include this file see - // DLL_API functions as being imported from a DLL, wheras this DLL sees symbols - // defined with this macro as being exported. - #ifdef FREEIMAGE_EXPORTS - #define DLL_API __declspec(dllexport) - #else - #define DLL_API __declspec(dllimport) - #endif // FREEIMAGE_EXPORTS - #else - // try the gcc visibility support (see http://gcc.gnu.org/wiki/Visibility) - #if defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) - #ifndef GCC_HASCLASSVISIBILITY - #define GCC_HASCLASSVISIBILITY - #endif - #endif // __GNUC__ - #define DLL_CALLCONV - #if defined(GCC_HASCLASSVISIBILITY) - #define DLL_API __attribute__ ((visibility("default"))) - #else - #define DLL_API - #endif - #endif // WIN32 / !WIN32 -#endif // FREEIMAGE_LIB - -// Some versions of gcc may have BYTE_ORDER or __BYTE_ORDER defined -// If your big endian system isn't being detected, add an OS specific check -#if (defined(BYTE_ORDER) && BYTE_ORDER==BIG_ENDIAN) || \ - (defined(__BYTE_ORDER) && __BYTE_ORDER==__BIG_ENDIAN) || \ - defined(__BIG_ENDIAN__) -#define FREEIMAGE_BIGENDIAN -#endif // BYTE_ORDER - -// This really only affects 24 and 32 bit formats, the rest are always RGB order. -#define FREEIMAGE_COLORORDER_BGR 0 -#define FREEIMAGE_COLORORDER_RGB 1 -#if defined(__APPLE__) || defined(FREEIMAGE_BIGENDIAN) -#define FREEIMAGE_COLORORDER FREEIMAGE_COLORORDER_RGB -#else -#define FREEIMAGE_COLORORDER FREEIMAGE_COLORORDER_BGR -#endif - -// Ensure 4-byte enums if we're using Borland C++ compilers -#if defined(__BORLANDC__) -#pragma option push -b -#endif - -// For C compatibility -------------------------------------------------------- - -#ifdef __cplusplus -#define FI_DEFAULT(x) = x -#define FI_ENUM(x) enum x -#define FI_STRUCT(x) struct x -#else -#define FI_DEFAULT(x) -#define FI_ENUM(x) typedef int x; enum x -#define FI_STRUCT(x) typedef struct x x; struct x -#endif - -// Bitmap types ------------------------------------------------------------- - -FI_STRUCT (FIBITMAP) { void *data; }; -FI_STRUCT (FIMULTIBITMAP) { void *data; }; - -// Types used in the library (directly copied from Windows) ----------------- - -#ifndef _WINDOWS_ -#define _WINDOWS_ - -#ifndef FALSE -#define FALSE 0 -#endif -#ifndef TRUE -#define TRUE 1 -#endif -#ifndef NULL -#define NULL 0 -#endif - -#ifndef SEEK_SET -#define SEEK_SET 0 -#define SEEK_CUR 1 -#define SEEK_END 2 -#endif - -#ifndef _MSC_VER -// define portable types for 32-bit / 64-bit OS -#include -typedef int32_t BOOL; -typedef uint8_t BYTE; -typedef uint16_t WORD; -typedef uint32_t DWORD; -typedef int32_t LONG; -#else -// MS is not C99 ISO compliant -typedef long BOOL; -typedef unsigned char BYTE; -typedef unsigned short WORD; -typedef unsigned long DWORD; -typedef long LONG; -#endif // _MSC_VER - -#if (defined(_WIN32) || defined(__WIN32__)) -#pragma pack(push, 1) -#else -#pragma pack(1) -#endif // WIN32 - -typedef struct tagRGBQUAD { -#if FREEIMAGE_COLORORDER == FREEIMAGE_COLORORDER_BGR - BYTE rgbBlue; - BYTE rgbGreen; - BYTE rgbRed; -#else - BYTE rgbRed; - BYTE rgbGreen; - BYTE rgbBlue; -#endif // FREEIMAGE_COLORORDER - BYTE rgbReserved; -} RGBQUAD; - -typedef struct tagRGBTRIPLE { -#if FREEIMAGE_COLORORDER == FREEIMAGE_COLORORDER_BGR - BYTE rgbtBlue; - BYTE rgbtGreen; - BYTE rgbtRed; -#else - BYTE rgbtRed; - BYTE rgbtGreen; - BYTE rgbtBlue; -#endif // FREEIMAGE_COLORORDER -} RGBTRIPLE; - -#if (defined(_WIN32) || defined(__WIN32__)) -#pragma pack(pop) -#else -#pragma pack() -#endif // WIN32 - -typedef struct tagBITMAPINFOHEADER{ - DWORD biSize; - LONG biWidth; - LONG biHeight; - WORD biPlanes; - WORD biBitCount; - DWORD biCompression; - DWORD biSizeImage; - LONG biXPelsPerMeter; - LONG biYPelsPerMeter; - DWORD biClrUsed; - DWORD biClrImportant; -} BITMAPINFOHEADER, *PBITMAPINFOHEADER; - -typedef struct tagBITMAPINFO { - BITMAPINFOHEADER bmiHeader; - RGBQUAD bmiColors[1]; -} BITMAPINFO, *PBITMAPINFO; - -#endif // _WINDOWS_ - -// Types used in the library (specific to FreeImage) ------------------------ - -#if (defined(_WIN32) || defined(__WIN32__)) -#pragma pack(push, 1) -#else -#pragma pack(1) -#endif // WIN32 - -/** 48-bit RGB -*/ -typedef struct tagFIRGB16 { - WORD red; - WORD green; - WORD blue; -} FIRGB16; - -/** 64-bit RGBA -*/ -typedef struct tagFIRGBA16 { - WORD red; - WORD green; - WORD blue; - WORD alpha; -} FIRGBA16; - -/** 96-bit RGB Float -*/ -typedef struct tagFIRGBF { - float red; - float green; - float blue; -} FIRGBF; - -/** 128-bit RGBA Float -*/ -typedef struct tagFIRGBAF { - float red; - float green; - float blue; - float alpha; -} FIRGBAF; - -/** Data structure for COMPLEX type (complex number) -*/ -typedef struct tagFICOMPLEX { - /// real part - double r; - /// imaginary part - double i; -} FICOMPLEX; - -#if (defined(_WIN32) || defined(__WIN32__)) -#pragma pack(pop) -#else -#pragma pack() -#endif // WIN32 - -// Indexes for byte arrays, masks and shifts for treating pixels as words --- -// These coincide with the order of RGBQUAD and RGBTRIPLE ------------------- - -#ifndef FREEIMAGE_BIGENDIAN -#if FREEIMAGE_COLORORDER == FREEIMAGE_COLORORDER_BGR -// Little Endian (x86 / MS Windows, Linux) : BGR(A) order -#define FI_RGBA_RED 2 -#define FI_RGBA_GREEN 1 -#define FI_RGBA_BLUE 0 -#define FI_RGBA_ALPHA 3 -#define FI_RGBA_RED_MASK 0x00FF0000 -#define FI_RGBA_GREEN_MASK 0x0000FF00 -#define FI_RGBA_BLUE_MASK 0x000000FF -#define FI_RGBA_ALPHA_MASK 0xFF000000 -#define FI_RGBA_RED_SHIFT 16 -#define FI_RGBA_GREEN_SHIFT 8 -#define FI_RGBA_BLUE_SHIFT 0 -#define FI_RGBA_ALPHA_SHIFT 24 -#else -// Little Endian (x86 / MaxOSX) : RGB(A) order -#define FI_RGBA_RED 0 -#define FI_RGBA_GREEN 1 -#define FI_RGBA_BLUE 2 -#define FI_RGBA_ALPHA 3 -#define FI_RGBA_RED_MASK 0x000000FF -#define FI_RGBA_GREEN_MASK 0x0000FF00 -#define FI_RGBA_BLUE_MASK 0x00FF0000 -#define FI_RGBA_ALPHA_MASK 0xFF000000 -#define FI_RGBA_RED_SHIFT 0 -#define FI_RGBA_GREEN_SHIFT 8 -#define FI_RGBA_BLUE_SHIFT 16 -#define FI_RGBA_ALPHA_SHIFT 24 -#endif // FREEIMAGE_COLORORDER -#else -#if FREEIMAGE_COLORORDER == FREEIMAGE_COLORORDER_BGR -// Big Endian (PPC / none) : BGR(A) order -#define FI_RGBA_RED 2 -#define FI_RGBA_GREEN 1 -#define FI_RGBA_BLUE 0 -#define FI_RGBA_ALPHA 3 -#define FI_RGBA_RED_MASK 0x0000FF00 -#define FI_RGBA_GREEN_MASK 0x00FF0000 -#define FI_RGBA_BLUE_MASK 0xFF000000 -#define FI_RGBA_ALPHA_MASK 0x000000FF -#define FI_RGBA_RED_SHIFT 8 -#define FI_RGBA_GREEN_SHIFT 16 -#define FI_RGBA_BLUE_SHIFT 24 -#define FI_RGBA_ALPHA_SHIFT 0 -#else -// Big Endian (PPC / Linux, MaxOSX) : RGB(A) order -#define FI_RGBA_RED 0 -#define FI_RGBA_GREEN 1 -#define FI_RGBA_BLUE 2 -#define FI_RGBA_ALPHA 3 -#define FI_RGBA_RED_MASK 0xFF000000 -#define FI_RGBA_GREEN_MASK 0x00FF0000 -#define FI_RGBA_BLUE_MASK 0x0000FF00 -#define FI_RGBA_ALPHA_MASK 0x000000FF -#define FI_RGBA_RED_SHIFT 24 -#define FI_RGBA_GREEN_SHIFT 16 -#define FI_RGBA_BLUE_SHIFT 8 -#define FI_RGBA_ALPHA_SHIFT 0 -#endif // FREEIMAGE_COLORORDER -#endif // FREEIMAGE_BIGENDIAN - -#define FI_RGBA_RGB_MASK (FI_RGBA_RED_MASK|FI_RGBA_GREEN_MASK|FI_RGBA_BLUE_MASK) - -// The 16bit macros only include masks and shifts, since each color element is not byte aligned - -#define FI16_555_RED_MASK 0x7C00 -#define FI16_555_GREEN_MASK 0x03E0 -#define FI16_555_BLUE_MASK 0x001F -#define FI16_555_RED_SHIFT 10 -#define FI16_555_GREEN_SHIFT 5 -#define FI16_555_BLUE_SHIFT 0 -#define FI16_565_RED_MASK 0xF800 -#define FI16_565_GREEN_MASK 0x07E0 -#define FI16_565_BLUE_MASK 0x001F -#define FI16_565_RED_SHIFT 11 -#define FI16_565_GREEN_SHIFT 5 -#define FI16_565_BLUE_SHIFT 0 - -// ICC profile support ------------------------------------------------------ - -#define FIICC_DEFAULT 0x00 -#define FIICC_COLOR_IS_CMYK 0x01 - -FI_STRUCT (FIICCPROFILE) { - WORD flags; // info flag - DWORD size; // profile's size measured in bytes - void *data; // points to a block of contiguous memory containing the profile -}; - -// Important enums ---------------------------------------------------------- - -/** I/O image format identifiers. -*/ -FI_ENUM(FREE_IMAGE_FORMAT) { - FIF_UNKNOWN = -1, - FIF_BMP = 0, - FIF_ICO = 1, - FIF_JPEG = 2, - FIF_JNG = 3, - FIF_KOALA = 4, - FIF_LBM = 5, - FIF_IFF = FIF_LBM, - FIF_MNG = 6, - FIF_PBM = 7, - FIF_PBMRAW = 8, - FIF_PCD = 9, - FIF_PCX = 10, - FIF_PGM = 11, - FIF_PGMRAW = 12, - FIF_PNG = 13, - FIF_PPM = 14, - FIF_PPMRAW = 15, - FIF_RAS = 16, - FIF_TARGA = 17, - FIF_TIFF = 18, - FIF_WBMP = 19, - FIF_PSD = 20, - FIF_CUT = 21, - FIF_XBM = 22, - FIF_XPM = 23, - FIF_DDS = 24, - FIF_GIF = 25, - FIF_HDR = 26, - FIF_FAXG3 = 27, - FIF_SGI = 28, - FIF_EXR = 29, - FIF_J2K = 30, - FIF_JP2 = 31 -}; - -/** Image type used in FreeImage. -*/ -FI_ENUM(FREE_IMAGE_TYPE) { - FIT_UNKNOWN = 0, // unknown type - FIT_BITMAP = 1, // standard image : 1-, 4-, 8-, 16-, 24-, 32-bit - FIT_UINT16 = 2, // array of unsigned short : unsigned 16-bit - FIT_INT16 = 3, // array of short : signed 16-bit - FIT_UINT32 = 4, // array of unsigned long : unsigned 32-bit - FIT_INT32 = 5, // array of long : signed 32-bit - FIT_FLOAT = 6, // array of float : 32-bit IEEE floating point - FIT_DOUBLE = 7, // array of double : 64-bit IEEE floating point - FIT_COMPLEX = 8, // array of FICOMPLEX : 2 x 64-bit IEEE floating point - FIT_RGB16 = 9, // 48-bit RGB image : 3 x 16-bit - FIT_RGBA16 = 10, // 64-bit RGBA image : 4 x 16-bit - FIT_RGBF = 11, // 96-bit RGB float image : 3 x 32-bit IEEE floating point - FIT_RGBAF = 12 // 128-bit RGBA float image : 4 x 32-bit IEEE floating point -}; - -/** Image color type used in FreeImage. -*/ -FI_ENUM(FREE_IMAGE_COLOR_TYPE) { - FIC_MINISWHITE = 0, // min value is white - FIC_MINISBLACK = 1, // min value is black - FIC_RGB = 2, // RGB color model - FIC_PALETTE = 3, // color map indexed - FIC_RGBALPHA = 4, // RGB color model with alpha channel - FIC_CMYK = 5 // CMYK color model -}; - -/** Color quantization algorithms. -Constants used in FreeImage_ColorQuantize. -*/ -FI_ENUM(FREE_IMAGE_QUANTIZE) { - FIQ_WUQUANT = 0, // Xiaolin Wu color quantization algorithm - FIQ_NNQUANT = 1 // NeuQuant neural-net quantization algorithm by Anthony Dekker -}; - -/** Dithering algorithms. -Constants used in FreeImage_Dither. -*/ -FI_ENUM(FREE_IMAGE_DITHER) { - FID_FS = 0, // Floyd & Steinberg error diffusion - FID_BAYER4x4 = 1, // Bayer ordered dispersed dot dithering (order 2 dithering matrix) - FID_BAYER8x8 = 2, // Bayer ordered dispersed dot dithering (order 3 dithering matrix) - FID_CLUSTER6x6 = 3, // Ordered clustered dot dithering (order 3 - 6x6 matrix) - FID_CLUSTER8x8 = 4, // Ordered clustered dot dithering (order 4 - 8x8 matrix) - FID_CLUSTER16x16= 5, // Ordered clustered dot dithering (order 8 - 16x16 matrix) - FID_BAYER16x16 = 6 // Bayer ordered dispersed dot dithering (order 4 dithering matrix) -}; - -/** Lossless JPEG transformations -Constants used in FreeImage_JPEGTransform -*/ -FI_ENUM(FREE_IMAGE_JPEG_OPERATION) { - FIJPEG_OP_NONE = 0, // no transformation - FIJPEG_OP_FLIP_H = 1, // horizontal flip - FIJPEG_OP_FLIP_V = 2, // vertical flip - FIJPEG_OP_TRANSPOSE = 3, // transpose across UL-to-LR axis - FIJPEG_OP_TRANSVERSE = 4, // transpose across UR-to-LL axis - FIJPEG_OP_ROTATE_90 = 5, // 90-degree clockwise rotation - FIJPEG_OP_ROTATE_180 = 6, // 180-degree rotation - FIJPEG_OP_ROTATE_270 = 7 // 270-degree clockwise (or 90 ccw) -}; - -/** Tone mapping operators. -Constants used in FreeImage_ToneMapping. -*/ -FI_ENUM(FREE_IMAGE_TMO) { - FITMO_DRAGO03 = 0, // Adaptive logarithmic mapping (F. Drago, 2003) - FITMO_REINHARD05 = 1, // Dynamic range reduction inspired by photoreceptor physiology (E. Reinhard, 2005) - FITMO_FATTAL02 = 2 // Gradient domain high dynamic range compression (R. Fattal, 2002) -}; - -/** Upsampling / downsampling filters. -Constants used in FreeImage_Rescale. -*/ -FI_ENUM(FREE_IMAGE_FILTER) { - FILTER_BOX = 0, // Box, pulse, Fourier window, 1st order (constant) b-spline - FILTER_BICUBIC = 1, // Mitchell & Netravali's two-param cubic filter - FILTER_BILINEAR = 2, // Bilinear filter - FILTER_BSPLINE = 3, // 4th order (cubic) b-spline - FILTER_CATMULLROM = 4, // Catmull-Rom spline, Overhauser spline - FILTER_LANCZOS3 = 5 // Lanczos3 filter -}; - -/** Color channels. -Constants used in color manipulation routines. -*/ -FI_ENUM(FREE_IMAGE_COLOR_CHANNEL) { - FICC_RGB = 0, // Use red, green and blue channels - FICC_RED = 1, // Use red channel - FICC_GREEN = 2, // Use green channel - FICC_BLUE = 3, // Use blue channel - FICC_ALPHA = 4, // Use alpha channel - FICC_BLACK = 5, // Use black channel - FICC_REAL = 6, // Complex images: use real part - FICC_IMAG = 7, // Complex images: use imaginary part - FICC_MAG = 8, // Complex images: use magnitude - FICC_PHASE = 9 // Complex images: use phase -}; - -// Metadata support --------------------------------------------------------- - -/** - Tag data type information (based on TIFF specifications) - - Note: RATIONALs are the ratio of two 32-bit integer values. -*/ -FI_ENUM(FREE_IMAGE_MDTYPE) { - FIDT_NOTYPE = 0, // placeholder - FIDT_BYTE = 1, // 8-bit unsigned integer - FIDT_ASCII = 2, // 8-bit bytes w/ last byte null - FIDT_SHORT = 3, // 16-bit unsigned integer - FIDT_LONG = 4, // 32-bit unsigned integer - FIDT_RATIONAL = 5, // 64-bit unsigned fraction - FIDT_SBYTE = 6, // 8-bit signed integer - FIDT_UNDEFINED = 7, // 8-bit untyped data - FIDT_SSHORT = 8, // 16-bit signed integer - FIDT_SLONG = 9, // 32-bit signed integer - FIDT_SRATIONAL = 10, // 64-bit signed fraction - FIDT_FLOAT = 11, // 32-bit IEEE floating point - FIDT_DOUBLE = 12, // 64-bit IEEE floating point - FIDT_IFD = 13, // 32-bit unsigned integer (offset) - FIDT_PALETTE = 14 // 32-bit RGBQUAD -}; - -/** - Metadata models supported by FreeImage -*/ -FI_ENUM(FREE_IMAGE_MDMODEL) { - FIMD_NODATA = -1, - FIMD_COMMENTS = 0, // single comment or keywords - FIMD_EXIF_MAIN = 1, // Exif-TIFF metadata - FIMD_EXIF_EXIF = 2, // Exif-specific metadata - FIMD_EXIF_GPS = 3, // Exif GPS metadata - FIMD_EXIF_MAKERNOTE = 4, // Exif maker note metadata - FIMD_EXIF_INTEROP = 5, // Exif interoperability metadata - FIMD_IPTC = 6, // IPTC/NAA metadata - FIMD_XMP = 7, // Abobe XMP metadata - FIMD_GEOTIFF = 8, // GeoTIFF metadata - FIMD_ANIMATION = 9, // Animation metadata - FIMD_CUSTOM = 10 // Used to attach other metadata types to a dib -}; - -/** - Handle to a metadata model -*/ -FI_STRUCT (FIMETADATA) { void *data; }; - -/** - Handle to a FreeImage tag -*/ -FI_STRUCT (FITAG) { void *data; }; - -// File IO routines --------------------------------------------------------- - -#ifndef FREEIMAGE_IO -#define FREEIMAGE_IO - -typedef void* fi_handle; -typedef unsigned (DLL_CALLCONV *FI_ReadProc) (void *buffer, unsigned size, unsigned count, fi_handle handle); -typedef unsigned (DLL_CALLCONV *FI_WriteProc) (void *buffer, unsigned size, unsigned count, fi_handle handle); -typedef int (DLL_CALLCONV *FI_SeekProc) (fi_handle handle, long offset, int origin); -typedef long (DLL_CALLCONV *FI_TellProc) (fi_handle handle); - -#if (defined(_WIN32) || defined(__WIN32__)) -#pragma pack(push, 1) -#else -#pragma pack(1) -#endif // WIN32 - -FI_STRUCT(FreeImageIO) { - FI_ReadProc read_proc; // pointer to the function used to read data - FI_WriteProc write_proc; // pointer to the function used to write data - FI_SeekProc seek_proc; // pointer to the function used to seek - FI_TellProc tell_proc; // pointer to the function used to aquire the current position -}; - -#if (defined(_WIN32) || defined(__WIN32__)) -#pragma pack(pop) -#else -#pragma pack() -#endif // WIN32 - -/** -Handle to a memory I/O stream -*/ -FI_STRUCT (FIMEMORY) { void *data; }; - -#endif // FREEIMAGE_IO - -// Plugin routines ---------------------------------------------------------- - -#ifndef PLUGINS -#define PLUGINS - -typedef const char *(DLL_CALLCONV *FI_FormatProc) (); -typedef const char *(DLL_CALLCONV *FI_DescriptionProc) (); -typedef const char *(DLL_CALLCONV *FI_ExtensionListProc) (); -typedef const char *(DLL_CALLCONV *FI_RegExprProc) (); -typedef void *(DLL_CALLCONV *FI_OpenProc)(FreeImageIO *io, fi_handle handle, BOOL read); -typedef void (DLL_CALLCONV *FI_CloseProc)(FreeImageIO *io, fi_handle handle, void *data); -typedef int (DLL_CALLCONV *FI_PageCountProc)(FreeImageIO *io, fi_handle handle, void *data); -typedef int (DLL_CALLCONV *FI_PageCapabilityProc)(FreeImageIO *io, fi_handle handle, void *data); -typedef FIBITMAP *(DLL_CALLCONV *FI_LoadProc)(FreeImageIO *io, fi_handle handle, int page, int flags, void *data); -typedef BOOL (DLL_CALLCONV *FI_SaveProc)(FreeImageIO *io, FIBITMAP *dib, fi_handle handle, int page, int flags, void *data); -typedef BOOL (DLL_CALLCONV *FI_ValidateProc)(FreeImageIO *io, fi_handle handle); -typedef const char *(DLL_CALLCONV *FI_MimeProc) (); -typedef BOOL (DLL_CALLCONV *FI_SupportsExportBPPProc)(int bpp); -typedef BOOL (DLL_CALLCONV *FI_SupportsExportTypeProc)(FREE_IMAGE_TYPE type); -typedef BOOL (DLL_CALLCONV *FI_SupportsICCProfilesProc)(); - -FI_STRUCT (Plugin) { - FI_FormatProc format_proc; - FI_DescriptionProc description_proc; - FI_ExtensionListProc extension_proc; - FI_RegExprProc regexpr_proc; - FI_OpenProc open_proc; - FI_CloseProc close_proc; - FI_PageCountProc pagecount_proc; - FI_PageCapabilityProc pagecapability_proc; - FI_LoadProc load_proc; - FI_SaveProc save_proc; - FI_ValidateProc validate_proc; - FI_MimeProc mime_proc; - FI_SupportsExportBPPProc supports_export_bpp_proc; - FI_SupportsExportTypeProc supports_export_type_proc; - FI_SupportsICCProfilesProc supports_icc_profiles_proc; -}; - -typedef void (DLL_CALLCONV *FI_InitProc)(Plugin *plugin, int format_id); - -#endif // PLUGINS - - -// Load / Save flag constants ----------------------------------------------- - -#define BMP_DEFAULT 0 -#define BMP_SAVE_RLE 1 -#define CUT_DEFAULT 0 -#define DDS_DEFAULT 0 -#define EXR_DEFAULT 0 // save data as half with piz-based wavelet compression -#define EXR_FLOAT 0x0001 // save data as float instead of as half (not recommended) -#define EXR_NONE 0x0002 // save with no compression -#define EXR_ZIP 0x0004 // save with zlib compression, in blocks of 16 scan lines -#define EXR_PIZ 0x0008 // save with piz-based wavelet compression -#define EXR_PXR24 0x0010 // save with lossy 24-bit float compression -#define EXR_B44 0x0020 // save with lossy 44% float compression - goes to 22% when combined with EXR_LC -#define EXR_LC 0x0040 // save images with one luminance and two chroma channels, rather than as RGB (lossy compression) -#define FAXG3_DEFAULT 0 -#define GIF_DEFAULT 0 -#define GIF_LOAD256 1 // Load the image as a 256 color image with ununsed palette entries, if it's 16 or 2 color -#define GIF_PLAYBACK 2 // 'Play' the GIF to generate each frame (as 32bpp) instead of returning raw frame data when loading -#define HDR_DEFAULT 0 -#define ICO_DEFAULT 0 -#define ICO_MAKEALPHA 1 // convert to 32bpp and create an alpha channel from the AND-mask when loading -#define IFF_DEFAULT 0 -#define J2K_DEFAULT 0 // save with a 16:1 rate -#define JP2_DEFAULT 0 // save with a 16:1 rate -#define JPEG_DEFAULT 0 // loading (see JPEG_FAST); saving (see JPEG_QUALITYGOOD) -#define JPEG_FAST 0x0001 // load the file as fast as possible, sacrificing some quality -#define JPEG_ACCURATE 0x0002 // load the file with the best quality, sacrificing some speed -#define JPEG_CMYK 0x0004 // load separated CMYK "as is" (use | to combine with other load flags) -#define JPEG_QUALITYSUPERB 0x80 // save with superb quality (100:1) -#define JPEG_QUALITYGOOD 0x0100 // save with good quality (75:1) -#define JPEG_QUALITYNORMAL 0x0200 // save with normal quality (50:1) -#define JPEG_QUALITYAVERAGE 0x0400 // save with average quality (25:1) -#define JPEG_QUALITYBAD 0x0800 // save with bad quality (10:1) -#define JPEG_PROGRESSIVE 0x2000 // save as a progressive-JPEG (use | to combine with other save flags) -#define KOALA_DEFAULT 0 -#define LBM_DEFAULT 0 -#define MNG_DEFAULT 0 -#define PCD_DEFAULT 0 -#define PCD_BASE 1 // load the bitmap sized 768 x 512 -#define PCD_BASEDIV4 2 // load the bitmap sized 384 x 256 -#define PCD_BASEDIV16 3 // load the bitmap sized 192 x 128 -#define PCX_DEFAULT 0 -#define PNG_DEFAULT 0 -#define PNG_IGNOREGAMMA 1 // avoid gamma correction -#define PNM_DEFAULT 0 -#define PNM_SAVE_RAW 0 // If set the writer saves in RAW format (i.e. P4, P5 or P6) -#define PNM_SAVE_ASCII 1 // If set the writer saves in ASCII format (i.e. P1, P2 or P3) -#define PSD_DEFAULT 0 -#define RAS_DEFAULT 0 -#define SGI_DEFAULT 0 -#define TARGA_DEFAULT 0 -#define TARGA_LOAD_RGB888 1 // If set the loader converts RGB555 and ARGB8888 -> RGB888. -#define TIFF_DEFAULT 0 -#define TIFF_CMYK 0x0001 // reads/stores tags for separated CMYK (use | to combine with compression flags) -#define TIFF_PACKBITS 0x0100 // save using PACKBITS compression -#define TIFF_DEFLATE 0x0200 // save using DEFLATE compression (a.k.a. ZLIB compression) -#define TIFF_ADOBE_DEFLATE 0x0400 // save using ADOBE DEFLATE compression -#define TIFF_NONE 0x0800 // save without any compression -#define TIFF_CCITTFAX3 0x1000 // save using CCITT Group 3 fax encoding -#define TIFF_CCITTFAX4 0x2000 // save using CCITT Group 4 fax encoding -#define TIFF_LZW 0x4000 // save using LZW compression -#define TIFF_JPEG 0x8000 // save using JPEG compression -#define WBMP_DEFAULT 0 -#define XBM_DEFAULT 0 -#define XPM_DEFAULT 0 - - -#ifdef __cplusplus -extern "C" { -#endif - -// Init / Error routines ---------------------------------------------------- - -DLL_API void DLL_CALLCONV FreeImage_Initialise(BOOL load_local_plugins_only FI_DEFAULT(FALSE)); -DLL_API void DLL_CALLCONV FreeImage_DeInitialise(void); - -// Version routines --------------------------------------------------------- - -DLL_API const char *DLL_CALLCONV FreeImage_GetVersion(void); -DLL_API const char *DLL_CALLCONV FreeImage_GetCopyrightMessage(void); - -// Message output functions ------------------------------------------------- - -typedef void (*FreeImage_OutputMessageFunction)(FREE_IMAGE_FORMAT fif, const char *msg); -typedef void (DLL_CALLCONV *FreeImage_OutputMessageFunctionStdCall)(FREE_IMAGE_FORMAT fif, const char *msg); - -DLL_API void DLL_CALLCONV FreeImage_SetOutputMessageStdCall(FreeImage_OutputMessageFunctionStdCall omf); -DLL_API void DLL_CALLCONV FreeImage_SetOutputMessage(FreeImage_OutputMessageFunction omf); -DLL_API void DLL_CALLCONV FreeImage_OutputMessageProc(int fif, const char *fmt, ...); - -// Allocate / Clone / Unload routines --------------------------------------- - -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_Allocate(int width, int height, int bpp, unsigned red_mask FI_DEFAULT(0), unsigned green_mask FI_DEFAULT(0), unsigned blue_mask FI_DEFAULT(0)); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_AllocateT(FREE_IMAGE_TYPE type, int width, int height, int bpp FI_DEFAULT(8), unsigned red_mask FI_DEFAULT(0), unsigned green_mask FI_DEFAULT(0), unsigned blue_mask FI_DEFAULT(0)); -DLL_API FIBITMAP * DLL_CALLCONV FreeImage_Clone(FIBITMAP *dib); -DLL_API void DLL_CALLCONV FreeImage_Unload(FIBITMAP *dib); - -// Load / Save routines ----------------------------------------------------- - -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_Load(FREE_IMAGE_FORMAT fif, const char *filename, int flags FI_DEFAULT(0)); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_LoadU(FREE_IMAGE_FORMAT fif, const wchar_t *filename, int flags FI_DEFAULT(0)); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_LoadFromHandle(FREE_IMAGE_FORMAT fif, FreeImageIO *io, fi_handle handle, int flags FI_DEFAULT(0)); -DLL_API BOOL DLL_CALLCONV FreeImage_Save(FREE_IMAGE_FORMAT fif, FIBITMAP *dib, const char *filename, int flags FI_DEFAULT(0)); -DLL_API BOOL DLL_CALLCONV FreeImage_SaveU(FREE_IMAGE_FORMAT fif, FIBITMAP *dib, const wchar_t *filename, int flags FI_DEFAULT(0)); -DLL_API BOOL DLL_CALLCONV FreeImage_SaveToHandle(FREE_IMAGE_FORMAT fif, FIBITMAP *dib, FreeImageIO *io, fi_handle handle, int flags FI_DEFAULT(0)); - -// Memory I/O stream routines ----------------------------------------------- - -DLL_API FIMEMORY *DLL_CALLCONV FreeImage_OpenMemory(BYTE *data FI_DEFAULT(0), DWORD size_in_bytes FI_DEFAULT(0)); -DLL_API void DLL_CALLCONV FreeImage_CloseMemory(FIMEMORY *stream); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_LoadFromMemory(FREE_IMAGE_FORMAT fif, FIMEMORY *stream, int flags FI_DEFAULT(0)); -DLL_API BOOL DLL_CALLCONV FreeImage_SaveToMemory(FREE_IMAGE_FORMAT fif, FIBITMAP *dib, FIMEMORY *stream, int flags FI_DEFAULT(0)); -DLL_API long DLL_CALLCONV FreeImage_TellMemory(FIMEMORY *stream); -DLL_API BOOL DLL_CALLCONV FreeImage_SeekMemory(FIMEMORY *stream, long offset, int origin); -DLL_API BOOL DLL_CALLCONV FreeImage_AcquireMemory(FIMEMORY *stream, BYTE **data, DWORD *size_in_bytes); -DLL_API unsigned DLL_CALLCONV FreeImage_ReadMemory(void *buffer, unsigned size, unsigned count, FIMEMORY *stream); -DLL_API unsigned DLL_CALLCONV FreeImage_WriteMemory(const void *buffer, unsigned size, unsigned count, FIMEMORY *stream); -DLL_API FIMULTIBITMAP *DLL_CALLCONV FreeImage_LoadMultiBitmapFromMemory(FREE_IMAGE_FORMAT fif, FIMEMORY *stream, int flags FI_DEFAULT(0)); - -// Plugin Interface --------------------------------------------------------- - -DLL_API FREE_IMAGE_FORMAT DLL_CALLCONV FreeImage_RegisterLocalPlugin(FI_InitProc proc_address, const char *format FI_DEFAULT(0), const char *description FI_DEFAULT(0), const char *extension FI_DEFAULT(0), const char *regexpr FI_DEFAULT(0)); -DLL_API FREE_IMAGE_FORMAT DLL_CALLCONV FreeImage_RegisterExternalPlugin(const char *path, const char *format FI_DEFAULT(0), const char *description FI_DEFAULT(0), const char *extension FI_DEFAULT(0), const char *regexpr FI_DEFAULT(0)); -DLL_API int DLL_CALLCONV FreeImage_GetFIFCount(void); -DLL_API int DLL_CALLCONV FreeImage_SetPluginEnabled(FREE_IMAGE_FORMAT fif, BOOL enable); -DLL_API int DLL_CALLCONV FreeImage_IsPluginEnabled(FREE_IMAGE_FORMAT fif); -DLL_API FREE_IMAGE_FORMAT DLL_CALLCONV FreeImage_GetFIFFromFormat(const char *format); -DLL_API FREE_IMAGE_FORMAT DLL_CALLCONV FreeImage_GetFIFFromMime(const char *mime); -DLL_API const char *DLL_CALLCONV FreeImage_GetFormatFromFIF(FREE_IMAGE_FORMAT fif); -DLL_API const char *DLL_CALLCONV FreeImage_GetFIFExtensionList(FREE_IMAGE_FORMAT fif); -DLL_API const char *DLL_CALLCONV FreeImage_GetFIFDescription(FREE_IMAGE_FORMAT fif); -DLL_API const char *DLL_CALLCONV FreeImage_GetFIFRegExpr(FREE_IMAGE_FORMAT fif); -DLL_API const char *DLL_CALLCONV FreeImage_GetFIFMimeType(FREE_IMAGE_FORMAT fif); -DLL_API FREE_IMAGE_FORMAT DLL_CALLCONV FreeImage_GetFIFFromFilename(const char *filename); -DLL_API FREE_IMAGE_FORMAT DLL_CALLCONV FreeImage_GetFIFFromFilenameU(const wchar_t *filename); -DLL_API BOOL DLL_CALLCONV FreeImage_FIFSupportsReading(FREE_IMAGE_FORMAT fif); -DLL_API BOOL DLL_CALLCONV FreeImage_FIFSupportsWriting(FREE_IMAGE_FORMAT fif); -DLL_API BOOL DLL_CALLCONV FreeImage_FIFSupportsExportBPP(FREE_IMAGE_FORMAT fif, int bpp); -DLL_API BOOL DLL_CALLCONV FreeImage_FIFSupportsExportType(FREE_IMAGE_FORMAT fif, FREE_IMAGE_TYPE type); -DLL_API BOOL DLL_CALLCONV FreeImage_FIFSupportsICCProfiles(FREE_IMAGE_FORMAT fif); - -// Multipaging interface ---------------------------------------------------- - -DLL_API FIMULTIBITMAP * DLL_CALLCONV FreeImage_OpenMultiBitmap(FREE_IMAGE_FORMAT fif, const char *filename, BOOL create_new, BOOL read_only, BOOL keep_cache_in_memory FI_DEFAULT(FALSE), int flags FI_DEFAULT(0)); -DLL_API BOOL DLL_CALLCONV FreeImage_CloseMultiBitmap(FIMULTIBITMAP *bitmap, int flags FI_DEFAULT(0)); -DLL_API int DLL_CALLCONV FreeImage_GetPageCount(FIMULTIBITMAP *bitmap); -DLL_API void DLL_CALLCONV FreeImage_AppendPage(FIMULTIBITMAP *bitmap, FIBITMAP *data); -DLL_API void DLL_CALLCONV FreeImage_InsertPage(FIMULTIBITMAP *bitmap, int page, FIBITMAP *data); -DLL_API void DLL_CALLCONV FreeImage_DeletePage(FIMULTIBITMAP *bitmap, int page); -DLL_API FIBITMAP * DLL_CALLCONV FreeImage_LockPage(FIMULTIBITMAP *bitmap, int page); -DLL_API void DLL_CALLCONV FreeImage_UnlockPage(FIMULTIBITMAP *bitmap, FIBITMAP *data, BOOL changed); -DLL_API BOOL DLL_CALLCONV FreeImage_MovePage(FIMULTIBITMAP *bitmap, int target, int source); -DLL_API BOOL DLL_CALLCONV FreeImage_GetLockedPageNumbers(FIMULTIBITMAP *bitmap, int *pages, int *count); - -// Filetype request routines ------------------------------------------------ - -DLL_API FREE_IMAGE_FORMAT DLL_CALLCONV FreeImage_GetFileType(const char *filename, int size FI_DEFAULT(0)); -DLL_API FREE_IMAGE_FORMAT DLL_CALLCONV FreeImage_GetFileTypeU(const wchar_t *filename, int size FI_DEFAULT(0)); -DLL_API FREE_IMAGE_FORMAT DLL_CALLCONV FreeImage_GetFileTypeFromHandle(FreeImageIO *io, fi_handle handle, int size FI_DEFAULT(0)); -DLL_API FREE_IMAGE_FORMAT DLL_CALLCONV FreeImage_GetFileTypeFromMemory(FIMEMORY *stream, int size FI_DEFAULT(0)); - -// Image type request routine ----------------------------------------------- - -DLL_API FREE_IMAGE_TYPE DLL_CALLCONV FreeImage_GetImageType(FIBITMAP *dib); - -// FreeImage helper routines ------------------------------------------------ - -DLL_API BOOL DLL_CALLCONV FreeImage_IsLittleEndian(void); -DLL_API BOOL DLL_CALLCONV FreeImage_LookupX11Color(const char *szColor, BYTE *nRed, BYTE *nGreen, BYTE *nBlue); -DLL_API BOOL DLL_CALLCONV FreeImage_LookupSVGColor(const char *szColor, BYTE *nRed, BYTE *nGreen, BYTE *nBlue); - - -// Pixel access routines ---------------------------------------------------- - -DLL_API BYTE *DLL_CALLCONV FreeImage_GetBits(FIBITMAP *dib); -DLL_API BYTE *DLL_CALLCONV FreeImage_GetScanLine(FIBITMAP *dib, int scanline); - -DLL_API BOOL DLL_CALLCONV FreeImage_GetPixelIndex(FIBITMAP *dib, unsigned x, unsigned y, BYTE *value); -DLL_API BOOL DLL_CALLCONV FreeImage_GetPixelColor(FIBITMAP *dib, unsigned x, unsigned y, RGBQUAD *value); -DLL_API BOOL DLL_CALLCONV FreeImage_SetPixelIndex(FIBITMAP *dib, unsigned x, unsigned y, BYTE *value); -DLL_API BOOL DLL_CALLCONV FreeImage_SetPixelColor(FIBITMAP *dib, unsigned x, unsigned y, RGBQUAD *value); - -// DIB info routines -------------------------------------------------------- - -DLL_API unsigned DLL_CALLCONV FreeImage_GetColorsUsed(FIBITMAP *dib); -DLL_API unsigned DLL_CALLCONV FreeImage_GetBPP(FIBITMAP *dib); -DLL_API unsigned DLL_CALLCONV FreeImage_GetWidth(FIBITMAP *dib); -DLL_API unsigned DLL_CALLCONV FreeImage_GetHeight(FIBITMAP *dib); -DLL_API unsigned DLL_CALLCONV FreeImage_GetLine(FIBITMAP *dib); -DLL_API unsigned DLL_CALLCONV FreeImage_GetPitch(FIBITMAP *dib); -DLL_API unsigned DLL_CALLCONV FreeImage_GetDIBSize(FIBITMAP *dib); -DLL_API RGBQUAD *DLL_CALLCONV FreeImage_GetPalette(FIBITMAP *dib); - -DLL_API unsigned DLL_CALLCONV FreeImage_GetDotsPerMeterX(FIBITMAP *dib); -DLL_API unsigned DLL_CALLCONV FreeImage_GetDotsPerMeterY(FIBITMAP *dib); -DLL_API void DLL_CALLCONV FreeImage_SetDotsPerMeterX(FIBITMAP *dib, unsigned res); -DLL_API void DLL_CALLCONV FreeImage_SetDotsPerMeterY(FIBITMAP *dib, unsigned res); - -DLL_API BITMAPINFOHEADER *DLL_CALLCONV FreeImage_GetInfoHeader(FIBITMAP *dib); -DLL_API BITMAPINFO *DLL_CALLCONV FreeImage_GetInfo(FIBITMAP *dib); -DLL_API FREE_IMAGE_COLOR_TYPE DLL_CALLCONV FreeImage_GetColorType(FIBITMAP *dib); - -DLL_API unsigned DLL_CALLCONV FreeImage_GetRedMask(FIBITMAP *dib); -DLL_API unsigned DLL_CALLCONV FreeImage_GetGreenMask(FIBITMAP *dib); -DLL_API unsigned DLL_CALLCONV FreeImage_GetBlueMask(FIBITMAP *dib); - -DLL_API unsigned DLL_CALLCONV FreeImage_GetTransparencyCount(FIBITMAP *dib); -DLL_API BYTE * DLL_CALLCONV FreeImage_GetTransparencyTable(FIBITMAP *dib); -DLL_API void DLL_CALLCONV FreeImage_SetTransparent(FIBITMAP *dib, BOOL enabled); -DLL_API void DLL_CALLCONV FreeImage_SetTransparencyTable(FIBITMAP *dib, BYTE *table, int count); -DLL_API BOOL DLL_CALLCONV FreeImage_IsTransparent(FIBITMAP *dib); -DLL_API void DLL_CALLCONV FreeImage_SetTransparentIndex(FIBITMAP *dib, int index); -DLL_API int DLL_CALLCONV FreeImage_GetTransparentIndex(FIBITMAP *dib); - -DLL_API BOOL DLL_CALLCONV FreeImage_HasBackgroundColor(FIBITMAP *dib); -DLL_API BOOL DLL_CALLCONV FreeImage_GetBackgroundColor(FIBITMAP *dib, RGBQUAD *bkcolor); -DLL_API BOOL DLL_CALLCONV FreeImage_SetBackgroundColor(FIBITMAP *dib, RGBQUAD *bkcolor); - - -// ICC profile routines ----------------------------------------------------- - -DLL_API FIICCPROFILE *DLL_CALLCONV FreeImage_GetICCProfile(FIBITMAP *dib); -DLL_API FIICCPROFILE *DLL_CALLCONV FreeImage_CreateICCProfile(FIBITMAP *dib, void *data, long size); -DLL_API void DLL_CALLCONV FreeImage_DestroyICCProfile(FIBITMAP *dib); - -// Line conversion routines ------------------------------------------------- - -DLL_API void DLL_CALLCONV FreeImage_ConvertLine1To4(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine8To4(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine16To4_555(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine16To4_565(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine24To4(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine32To4(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine1To8(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine4To8(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine16To8_555(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine16To8_565(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine24To8(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine32To8(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine1To16_555(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine4To16_555(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine8To16_555(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine16_565_To16_555(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine24To16_555(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine32To16_555(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine1To16_565(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine4To16_565(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine8To16_565(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine16_555_To16_565(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine24To16_565(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine32To16_565(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine1To24(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine4To24(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine8To24(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine16To24_555(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine16To24_565(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine32To24(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine1To32(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine4To32(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine8To32(BYTE *target, BYTE *source, int width_in_pixels, RGBQUAD *palette); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine16To32_555(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine16To32_565(BYTE *target, BYTE *source, int width_in_pixels); -DLL_API void DLL_CALLCONV FreeImage_ConvertLine24To32(BYTE *target, BYTE *source, int width_in_pixels); - -// Smart conversion routines ------------------------------------------------ - -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ConvertTo4Bits(FIBITMAP *dib); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ConvertTo8Bits(FIBITMAP *dib); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ConvertToGreyscale(FIBITMAP *dib); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ConvertTo16Bits555(FIBITMAP *dib); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ConvertTo16Bits565(FIBITMAP *dib); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ConvertTo24Bits(FIBITMAP *dib); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ConvertTo32Bits(FIBITMAP *dib); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ColorQuantize(FIBITMAP *dib, FREE_IMAGE_QUANTIZE quantize); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ColorQuantizeEx(FIBITMAP *dib, FREE_IMAGE_QUANTIZE quantize FI_DEFAULT(FIQ_WUQUANT), int PaletteSize FI_DEFAULT(256), int ReserveSize FI_DEFAULT(0), RGBQUAD *ReservePalette FI_DEFAULT(NULL)); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_Threshold(FIBITMAP *dib, BYTE T); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_Dither(FIBITMAP *dib, FREE_IMAGE_DITHER algorithm); - -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ConvertFromRawBits(BYTE *bits, int width, int height, int pitch, unsigned bpp, unsigned red_mask, unsigned green_mask, unsigned blue_mask, BOOL topdown FI_DEFAULT(FALSE)); -DLL_API void DLL_CALLCONV FreeImage_ConvertToRawBits(BYTE *bits, FIBITMAP *dib, int pitch, unsigned bpp, unsigned red_mask, unsigned green_mask, unsigned blue_mask, BOOL topdown FI_DEFAULT(FALSE)); - -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ConvertToRGBF(FIBITMAP *dib); - -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ConvertToStandardType(FIBITMAP *src, BOOL scale_linear FI_DEFAULT(TRUE)); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ConvertToType(FIBITMAP *src, FREE_IMAGE_TYPE dst_type, BOOL scale_linear FI_DEFAULT(TRUE)); - -// tone mapping operators -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_ToneMapping(FIBITMAP *dib, FREE_IMAGE_TMO tmo, double first_param FI_DEFAULT(0), double second_param FI_DEFAULT(0)); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_TmoDrago03(FIBITMAP *src, double gamma FI_DEFAULT(2.2), double exposure FI_DEFAULT(0)); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_TmoReinhard05(FIBITMAP *src, double intensity FI_DEFAULT(0), double contrast FI_DEFAULT(0)); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_TmoFattal02(FIBITMAP *src, double color_saturation FI_DEFAULT(0.5), double attenuation FI_DEFAULT(0.85)); - -// ZLib interface ----------------------------------------------------------- - -DLL_API DWORD DLL_CALLCONV FreeImage_ZLibCompress(BYTE *target, DWORD target_size, BYTE *source, DWORD source_size); -DLL_API DWORD DLL_CALLCONV FreeImage_ZLibUncompress(BYTE *target, DWORD target_size, BYTE *source, DWORD source_size); -DLL_API DWORD DLL_CALLCONV FreeImage_ZLibGZip(BYTE *target, DWORD target_size, BYTE *source, DWORD source_size); -DLL_API DWORD DLL_CALLCONV FreeImage_ZLibGUnzip(BYTE *target, DWORD target_size, BYTE *source, DWORD source_size); -DLL_API DWORD DLL_CALLCONV FreeImage_ZLibCRC32(DWORD crc, BYTE *source, DWORD source_size); - -// -------------------------------------------------------------------------- -// Metadata routines -------------------------------------------------------- -// -------------------------------------------------------------------------- - -// tag creation / destruction -DLL_API FITAG *DLL_CALLCONV FreeImage_CreateTag(); -DLL_API void DLL_CALLCONV FreeImage_DeleteTag(FITAG *tag); -DLL_API FITAG *DLL_CALLCONV FreeImage_CloneTag(FITAG *tag); - -// tag getters and setters -DLL_API const char *DLL_CALLCONV FreeImage_GetTagKey(FITAG *tag); -DLL_API const char *DLL_CALLCONV FreeImage_GetTagDescription(FITAG *tag); -DLL_API WORD DLL_CALLCONV FreeImage_GetTagID(FITAG *tag); -DLL_API FREE_IMAGE_MDTYPE DLL_CALLCONV FreeImage_GetTagType(FITAG *tag); -DLL_API DWORD DLL_CALLCONV FreeImage_GetTagCount(FITAG *tag); -DLL_API DWORD DLL_CALLCONV FreeImage_GetTagLength(FITAG *tag); -DLL_API const void *DLL_CALLCONV FreeImage_GetTagValue(FITAG *tag); - -DLL_API BOOL DLL_CALLCONV FreeImage_SetTagKey(FITAG *tag, const char *key); -DLL_API BOOL DLL_CALLCONV FreeImage_SetTagDescription(FITAG *tag, const char *description); -DLL_API BOOL DLL_CALLCONV FreeImage_SetTagID(FITAG *tag, WORD id); -DLL_API BOOL DLL_CALLCONV FreeImage_SetTagType(FITAG *tag, FREE_IMAGE_MDTYPE type); -DLL_API BOOL DLL_CALLCONV FreeImage_SetTagCount(FITAG *tag, DWORD count); -DLL_API BOOL DLL_CALLCONV FreeImage_SetTagLength(FITAG *tag, DWORD length); -DLL_API BOOL DLL_CALLCONV FreeImage_SetTagValue(FITAG *tag, const void *value); - -// iterator -DLL_API FIMETADATA *DLL_CALLCONV FreeImage_FindFirstMetadata(FREE_IMAGE_MDMODEL model, FIBITMAP *dib, FITAG **tag); -DLL_API BOOL DLL_CALLCONV FreeImage_FindNextMetadata(FIMETADATA *mdhandle, FITAG **tag); -DLL_API void DLL_CALLCONV FreeImage_FindCloseMetadata(FIMETADATA *mdhandle); - -// metadata setter and getter -DLL_API BOOL DLL_CALLCONV FreeImage_SetMetadata(FREE_IMAGE_MDMODEL model, FIBITMAP *dib, const char *key, FITAG *tag); -DLL_API BOOL DLL_CALLCONV FreeImage_GetMetadata(FREE_IMAGE_MDMODEL model, FIBITMAP *dib, const char *key, FITAG **tag); - -// helpers -DLL_API unsigned DLL_CALLCONV FreeImage_GetMetadataCount(FREE_IMAGE_MDMODEL model, FIBITMAP *dib); - -// tag to C string conversion -DLL_API const char* DLL_CALLCONV FreeImage_TagToString(FREE_IMAGE_MDMODEL model, FITAG *tag, char *Make FI_DEFAULT(NULL)); - -// -------------------------------------------------------------------------- -// Image manipulation toolkit ----------------------------------------------- -// -------------------------------------------------------------------------- - -// rotation and flipping -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_RotateClassic(FIBITMAP *dib, double angle); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_RotateEx(FIBITMAP *dib, double angle, double x_shift, double y_shift, double x_origin, double y_origin, BOOL use_mask); -DLL_API BOOL DLL_CALLCONV FreeImage_FlipHorizontal(FIBITMAP *dib); -DLL_API BOOL DLL_CALLCONV FreeImage_FlipVertical(FIBITMAP *dib); -DLL_API BOOL DLL_CALLCONV FreeImage_JPEGTransform(const char *src_file, const char *dst_file, FREE_IMAGE_JPEG_OPERATION operation, BOOL perfect FI_DEFAULT(FALSE)); - -// upsampling / downsampling -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_Rescale(FIBITMAP *dib, int dst_width, int dst_height, FREE_IMAGE_FILTER filter); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_MakeThumbnail(FIBITMAP *dib, int max_pixel_size, BOOL convert FI_DEFAULT(TRUE)); - -// color manipulation routines (point operations) -DLL_API BOOL DLL_CALLCONV FreeImage_AdjustCurve(FIBITMAP *dib, BYTE *LUT, FREE_IMAGE_COLOR_CHANNEL channel); -DLL_API BOOL DLL_CALLCONV FreeImage_AdjustGamma(FIBITMAP *dib, double gamma); -DLL_API BOOL DLL_CALLCONV FreeImage_AdjustBrightness(FIBITMAP *dib, double percentage); -DLL_API BOOL DLL_CALLCONV FreeImage_AdjustContrast(FIBITMAP *dib, double percentage); -DLL_API BOOL DLL_CALLCONV FreeImage_Invert(FIBITMAP *dib); -DLL_API BOOL DLL_CALLCONV FreeImage_GetHistogram(FIBITMAP *dib, DWORD *histo, FREE_IMAGE_COLOR_CHANNEL channel FI_DEFAULT(FICC_BLACK)); -DLL_API int DLL_CALLCONV FreeImage_GetAdjustColorsLookupTable(BYTE *LUT, double brightness, double contrast, double gamma, BOOL invert); -DLL_API BOOL DLL_CALLCONV FreeImage_AdjustColors(FIBITMAP *dib, double brightness, double contrast, double gamma, BOOL invert FI_DEFAULT(FALSE)); -DLL_API unsigned DLL_CALLCONV FreeImage_ApplyColorMapping(FIBITMAP *dib, RGBQUAD *srccolors, RGBQUAD *dstcolors, unsigned count, BOOL ignore_alpha, BOOL swap); -DLL_API unsigned DLL_CALLCONV FreeImage_SwapColors(FIBITMAP *dib, RGBQUAD *color_a, RGBQUAD *color_b, BOOL ignore_alpha); -DLL_API unsigned DLL_CALLCONV FreeImage_ApplyPaletteIndexMapping(FIBITMAP *dib, BYTE *srcindices, BYTE *dstindices, unsigned count, BOOL swap); -DLL_API unsigned DLL_CALLCONV FreeImage_SwapPaletteIndices(FIBITMAP *dib, BYTE *index_a, BYTE *index_b); - -// channel processing routines -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_GetChannel(FIBITMAP *dib, FREE_IMAGE_COLOR_CHANNEL channel); -DLL_API BOOL DLL_CALLCONV FreeImage_SetChannel(FIBITMAP *dib, FIBITMAP *dib8, FREE_IMAGE_COLOR_CHANNEL channel); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_GetComplexChannel(FIBITMAP *src, FREE_IMAGE_COLOR_CHANNEL channel); -DLL_API BOOL DLL_CALLCONV FreeImage_SetComplexChannel(FIBITMAP *dst, FIBITMAP *src, FREE_IMAGE_COLOR_CHANNEL channel); - -// copy / paste / composite routines -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_Copy(FIBITMAP *dib, int left, int top, int right, int bottom); -DLL_API BOOL DLL_CALLCONV FreeImage_Paste(FIBITMAP *dst, FIBITMAP *src, int left, int top, int alpha); -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_Composite(FIBITMAP *fg, BOOL useFileBkg FI_DEFAULT(FALSE), RGBQUAD *appBkColor FI_DEFAULT(NULL), FIBITMAP *bg FI_DEFAULT(NULL)); -DLL_API BOOL DLL_CALLCONV FreeImage_JPEGCrop(const char *src_file, const char *dst_file, int left, int top, int right, int bottom); -DLL_API BOOL DLL_CALLCONV FreeImage_PreMultiplyWithAlpha(FIBITMAP *dib); - -// miscellaneous algorithms -DLL_API FIBITMAP *DLL_CALLCONV FreeImage_MultigridPoissonSolver(FIBITMAP *Laplacian, int ncycle FI_DEFAULT(3)); - -// restore the borland-specific enum size option -#if defined(__BORLANDC__) -#pragma option pop -#endif - -#ifdef __cplusplus -} -#endif - -#endif // FREEIMAGE_H diff --git a/extern/FreeImage/FreeImage.lib b/extern/FreeImage/FreeImage.lib deleted file mode 100644 index 7e6e825..0000000 Binary files a/extern/FreeImage/FreeImage.lib and /dev/null differ diff --git a/extern/FreeImage/Whatsnew.txt b/extern/FreeImage/Whatsnew.txt deleted file mode 100644 index aaddffb..0000000 --- a/extern/FreeImage/Whatsnew.txt +++ /dev/null @@ -1,898 +0,0 @@ -What's New for FreeImage - -* : fixed -- : removed -! : changed -+ : added - -November 19th, 2007 - 3.10.0 -! FreeImage now uses libTIFF 3.9.0beta (CVS patch 2007-10-05) -! FreeImage now uses OpenJPEG 1.2.0 (SVN patch 2007-07-13) -! FreeImage now uses OpenEXR 1.6.1 -! FreeImage now uses libPNG 1.2.23 -! FreeImage now hides its internal functions and internal libraries when compiled with gcc -- [Herve Drolon] removed VS C+ 6.0 project files : this IDE is no longer supported because of OpenEXR -+ [Herve Drolon] added VS 2005 project files -+ [Herve Drolon] added full support for the OpenEXR format -+ [Herve Drolon] added full support for the JPEG-2000 format -+ [Herve Drolon] added FreeImage_TmoFattal02 tone mapping operator -+ [Ryan Rubley] added support for RGB vs BGR regardless of endian -+ [Herve Drolon] added FreeImage_MultigridPoissonSolver -+ [Carsten Klein] added FreeImage_PreMultiplyWithAlpha -+ [Carsten Klein] added __stdcall version of FreeImage_OutputMessage -+ [Carsten Klein] added new palette and color manipulation functions (see below) : - added FreeImage_SetTransparentIndex - added FreeImage_GetTransparentIndex - added FreeImage_GetAdjustColorsLookupTable - added FreeImage_AdjustColors - added FreeImage_ApplyColorMapping - added FreeImage_SwapColors - added FreeImage_ApplyPaletteIndexMapping - added FreeImage_SwapPaletteIndices -* [Herve Drolon] fixed a bug in TIFF plugin when reading 8-bit + 8-bit alpha images -* [Herve Drolon] fixed a bug in TIFF plugin when reading images with uncommon bitdepths -* [rodrigo] fixed FreeImage exception handling under gcc (added -fexceptions to gcc compiler flags) -* [Martin Dyring-Andersen] fixed GIF plugin crashing on some corrupted files -* [Herve Drolon] fixed a bug with RLE encoding for 8-bit BMP images -* [Herve Drolon] fixed GPS metadata being skipped when reading metadata in Exif images -* [Herve Drolon] fixed a bug when reading OS/2 BMP images with a negative height -* [Ryan Rubley] fixed a bug with loading of GIFs with large amounts of solid color areas -* [Ryan Rubley] fixed OS X compile error in BitmapAccess.cpp -* [Herve Drolon] fixed a bug in FreeImage_Paste when pasting non-standard image types -* [Herve Drolon] saving 1-bit TIF with the TIFF_CCITTFAX3 flag is now compliant with the TIFF Class F specification -* [Carsten Klein] fixed topdown parameter in FreeImage_ConvertFromRawBits and FreeImage_ConvertToRawBits being handled in reverse -* [Herve Drolon] fixed a bug when reading some RLE-4 encoded BMP data -* [Carsten Klein] conversion from 1-bit to 32-bit now keep possibly present transparency - -February 11th, 2007 - 3.9.3 -! FreeImage now uses libPNG 1.2.16 -! [Ryan Rubley/Ryan Davis] reworked the MacOSX makefile in order to fully support Universal Binary builds of FreeImage -! [Herve Drolon] makefiles are now generated from VS2003 project files instead of VS6 project files -! [Herve Drolon] changed JPEG load/save flag option values -+ [Herve Drolon] added support for RGBAF images to FreeImage_ConvertToRGBF -+ [Herve Drolon] FreeImage_Paste now works with any bitmap type -+ [Herve Drolon] added full support for 64-bit RGBA images to the PNG and TIFF plugins -+ [Jascha Wetzel] added JPEG downsampling feature to PluginJPEG:Load -* [Thomas Chmielewski] fixed a bug in FreeImage_Dither and Bayer dithering, added FID_BAYER16x16 -* [Raphael Gaquer] greatly improved the speed of the GIF encoder -* [Herve Drolon] fixed saving of metadata in the PNG plugin -* [rampelstinskin] fixed transparency table to alpha channel conversion for 4-bit images in FreeImage_ConvertTo32Bits -* [Scott Smith] added missing IPTC tag named "Country/PrimaryLocationCode" -* [Herve Drolon] changed #include by #include in FreeImage.h (needed by Solaris 9) -* [Pierre Arnaud] fixed the use of FreeImage in low memory condition by checking some returned values of the malloc function -* [Pierre Arnaud] fixed TagLib::getTagFieldName not being thread safe - -October 30th, 2006 - 3.9.2 -! FreeImage now uses libTIFF 3.8.2 (with patch 2006-10-13) -+ [Herve Drolon] added full support for 16-bit greyscale and 48-bit RGB to the PNM plugin -+ [Herve Drolon] added IPTC writing support to JPEG & TIFF plugins -+ [Herve Drolon] added new Exif maker note tags -+ [Herve Drolon] added FreeImage_JPEGCrop -+ [Thorsten Radde] added support for 8-bit palettized bitmaps in FreeImage_RotateClassic -+ [Matt Rice] added automatic call to FreeImage_Initialise / FreeImage_DeInitialise when using FreeImage as a .so -+ [Martin Dyring-Andersen] added FreeImage_LoadMultiBitmapFromMemory to the multi-page API -+ [Herve Drolon] added support for tiled TIFF images -* [Carsten Klein] fixed a bug in FreeImage_SetMetadata occuring when deleting a tag -* [Herve Drolon] fixed a bug in PNG plugin when reading Macromedia 'false' PNG files -* [Thorsten Radde] added resolution support to PluginPSD -* [Ryan Rubley] fixed a bug in PluginGIF occuring with interlaced GIF -* [Ryan Rubley] fixed a bug in the multipage cache mechanism (internal FreeImage_FindBlock function) -* [Thorsten Radde] fixed a stack corruption in TIFF plugin occuring when reading exif tags -* [checkered] fixed a bug in the multipage cache mechanism causing VS2005 to crash on multipage files -* [Herve Drolon] fixed a bug with transparency support of 1- and 4-bit images -* [Roar Flolo] fixed a bug in PSD plugin when reading non compressed RGB images (alpha channel initialization) -* [Nicolas Hatier] fixed a bug in PluginGIF when using the GIF_PLAYBACK flag -* [Herve Drolon] fixed a bug in TIFF plugin when saving 8-bit images using LZW with differenciation -* [Herve Drolon] fixed 64-bit compilation issue with LibPNG and assembler code - -July 16th, 2006 - 3.9.1 -* [Ryan Rubley] fixed a bug in PluginGIF plugin causing FreeImage to crash on malformed GIF files - -July 6th, 2006 - 3.9.0 -! FreeImage now uses libPNG 1.2.12 -! FreeImage now uses libTIFF 3.8.2 (with patch 2006-06-24) -! FreeImage_Allocate/FreeImage_Allocate now set the resolution to 72 dpi instead of 0 -+ [Herve Drolon/Petr Pytelka] added a raw FAX G3 format loader -+ [Herve Drolon] added support for most image types to FreeImage_Rescale -+ [Herve Drolon] added FreeImage_MakeThumbnail -+ [Herve Drolon] added support for 64-bit images to FreeImage_ConvertTo32Bits -+ [Herve Drolon] added support for Exif tags to TIF plugin (read only) -+ [Herve Drolon] added FreeImage_ReadMemory -+ [Herve Drolon] added FreeImage_WriteMemory -+ [Herve Drolon] added new Exif maker note tags -+ [Sherman Wilcox] added a SGI file format loader -+ [Herve Drolon] added support for separated images to PluginTIFF -+ [Herve Drolon] added support for progressive-JPEG saving to PluginJPEG -* [Carsten Klein] FreeImage_Dither and FreeImage_Threshold now work with palettized 8-bit dib -* [Christophe Petit] fixed a bug in FreeImage_GetFIFFromFilenameU occuring with files without extension -* [Leigh Brasington] fixed a bug in PluginGIF causing FreeImage not working on Win/98/ME -* [Herve Drolon] fixed a bug in PluginTIFF with writing of JPEG-in-TIFF files -* [Jojakim Stahl] fixed a bug occuring with 4-bit PCX files -* [Sandor Szalacsi] fixed a bug in FreeImage_SetBackgroundColor (bkgnd clearing) -* [Petr Pytelka] fixed PluginTIFF::_tiffSizeProc failing on some images -* [Sherman Wilcox] fixed a bug in DDS plugin when loading images whose size is not a multiple of 4 -* [Sherman Wilcox] fixed a memory leak in PluginDDS::LoadDXT_Helper -* [Sherman Wilcox] fixed DDS plugin bad behavior with invalid DDS files (such as files with zero length) -* [Floris van den Berg] fixed a memory leak in the MultiPage cache mechanism -* [Herve Drolon] replaced WIN32 #define by _WIN32 #define as this is needed by VS2005 -* [Herve Drolon] fixed a VS2005 error in FreeImage_DeletePage -* [Petr Supina] fixed a pow(long,long) function not being standard ANSI C/C++ -* [Petr Supina] fixed FreeImage_FindBlock function not being standard ANSI C/C++ -* [Olaf Stoyke] added support for 64-bit Linux OS -* [Craig Stark] fixed FreeImage support on Intel based Mac OS -* [Herve Drolon] fixed PluginTIFF failing on bad fax tiff images (bad images are now loaded 'as is') -* [Zack Simpson] fixed a bug occuring in rare situations with FreeImage_Aligned_Malloc - -September 5, 2005 - 3.8.0 -! FreeImage now uses libTIFF 3.7.3 -! FreeImage now uses ZLib 1.2.3 -+ [Herve Drolon] added support for 48-bit images to FreeImage_ConvertTo24Bits -+ [Herve Drolon] added FreeImage_ConvertToGreyscale -+ [Herve Drolon] added support for 16-bit greyscale images to FreeImage_ConvertTo8Bits -+ [Petr Pytelka] added UNICODE functions (see below) - added FreeImage_LoadU - added FreeImage_SaveU - added FreeImage_GetFIFFromFilenameU - added FreeImage_GetFileTypeU -+ [Herve Drolon] FreeImage_Copy now works with any bitmap type -+ [Herve Drolon] added support for 1-bit images to FreeImage_Paste -* [Ryan Rubley] fixed PluginGIF failing to link on some broken gcc versions -* [Karl-Heinz Bussian] fixed a bug in LookupX11Color/LookupSVGColor with handling of grey color names -* [Herve Drolon] FreeImage_Dither now uses FreeImage_ConvertToGreyscale and handles 4/8-bit palletized images -* [Herve Drolon] FreeImage_Threshold now uses FreeImage_ConvertToGreyscale and handles 4/8-bit palletized images -* [Craig Hockenberry] fixed PluginGIF::Save swapping the byte order for the height on big endian machines (e.g. PPC on Mac OS X.) -* [Herve Drolon] fixed a bug in JPEG plugin when reading Exif maker notes from images produced by Nikon Editor -* [Herve Drolon] fixed a bug in BMP plugin when reading some malformed RLE8 bmp -* [Herve Drolon] fixed a bug in RAS plugin when loading 8-bit palettized images with less than 256 colors -* [Herve Drolon] fixed a bug in FreeImage_Rescale with 16-,48-,64-bit images -* [Herve Drolon] fixed a bug in the ICC profiles API when loading profile-less CMYK TIFF -* [Herve Drolon] 4-bit PNG are now loaded as 4-bit and no longer converted to 8-bit -* [Greg Ng] fixed a bug in FreeImage_ConvertToRGBF (FIT_BITMAP -> FIT_RGBF conversion) - -May 7, 2005 - 3.7.0 -! FreeImage now uses libTIFF 3.7.2 -! [Ryan Rubley] improved FreeImage_OpenMultiBitmap -+ [Detlev Vendt] added FreeImage_ZLibGUnzip -+ [Herve Drolon] added new image data types FIT_RGB16, FIT_RGBA16, FIT_RGBF, FIT_RGBAF -+ [Herve Drolon] FreeImage_FlipHorizontal & FreeImage_FlipVertical now work with any bitmap type -+ [Herve Drolon] added conversions to float and double in FreeImage_ConvertToType -+ [Herve Drolon] added FreeImage_ConvertToRGBF -+ [Herve Drolon] added support for 16-, 48- and 96-bit images to FreeImage_Rescale -+ [Ryan Rubley] added FreeImage_ColorQuantizeEx -+ [Ryan Rubley] added FIMD_ANIMATION and FIDT_PALETTE -+ [Ryan Rubley] added brand new PluginGIF with full animation multipage and metadata support -+ [Herve Drolon] added support for FIC_MINISWHITE 8-bit images to FreeImage_Rescale -+ [Herve Drolon] added HDR (High Dynamic Range) format (loader & writer) -+ [Herve Drolon] added support for 48-bit images in TIFF plugin -+ [Herve Drolon] added support for 48-bit images in PNG plugin -+ [Herve Drolon] added tone mapping operators (see below) -+ added FreeImage_ToneMapping -+ added FreeImage_TmoDrago03 -+ added FreeImage_TmoReinhard05 -+ [Petr Pytelka] added FreeImage_JPEGTransform -* [Herve Drolon] allowed loading of corrupted JPEG with a premature end of file -* [Herve Drolon] fixed a memory leak with loading of exif JPEG images -* [Detlev Vendt] changed some 'pointer-to-int' casts to 'pointer-to-long' for 64bit machines -* [Ryan Rubley] fixed a memory leak in the multipage API -* [Ryan Rubley] updated VB6 wrapper generation for new functions -* [Herve Drolon] fixed incorrect behavior when reading JPEG comments containing special characters -* [Herve Drolon] fixed incorrect behavior when reading JPEG ICC profiles with a size greater than 64 KB -* [Herve Drolon] fixed a bug in TIFF plugin when loading malformed multipage TIFF -* [Herve Drolon] fixed PluginTIFF not being thread safe - -February 20, 2005 - 3.6.1 -* [Ryan Rubley] fixed a memory leak in the metadata API -* [luedi] improved the robustness of FIBITMAP allocations - -February 13, 2005 - 3.6.0 -! FreeImage now uses libMNG 1.0.9 -! [Herve Drolon] improved the speed of FreeImage_Rescale -! [Herve Drolon] improved FreeImage_RotateClassic (more compact code, a little faster) -! [Herve Drolon] improved the metadata API using tag accessors -+ [Detlev Vendt] added LZW support to PluginGIF:Save -+ [Herve Drolon] added VS.Net 2003 project files -+ [Herve Drolon] added VERSIONINFO resource to the DLL -+ [Herve Drolon] added support for CMYK JPEG on loading -+ [Petr Supina] added 16-bytes alignment to FIBITMAP palette and pixels starting address -+ [Petr Supina] added support for MMX/SSE2 code in LibJPEG (based on Mozilla/Firefox code) -+ [Herve Drolon] added TIFF_JPEG compression flag to the TIFF plugin -+ [Detlev Vendt] added FreeImage_ZLibGZip -+ [Detlev Vendt] added FreeImage_ZLibCRC32 -* [Detlev Vendt] fixed PluginPNG not being thread safe -* [Herve Drolon] fixed compiler warning C4018 occuring with VS.Net 2003 - -December 29, 2004 - 3.5.3 -! FreeImage now uses ZLib 1.2.2 -! FreeImage now uses libPNG 1.2.8 -! FreeImage now uses libTIFF 3.7.1 -! [Herve Drolon] improved FreeImage_RotateClassic -! [Detlev Vendt] improved FreeImage_Rescale (more compact code, preserving 8-bpp colors) -+ [Herve Drolon] added support for transparency saving in ICO plugin -+ [Herve Drolon] added support for 1-bit images to FreeImage_RotateClassic -+ [Herve Drolon] added FreeImage_SetDotsPerMeterX and FreeImage_SetDotsPerMeterY -* [Nan Feng] fixed memory leak in FreeImage_DeleteTag (internal stuff) -* [Nigel Stewart] added conditional #pragma with #ifdef _MSC_VER / #endif -* [Herve Drolon] fixed the '65536 lines' limit on loading in PNM plugin - -November 27th, 2004 - 3.5.2 -* [Herve Drolon] fixed a second bug in FreeImage_Clone function - -November 26th, 2004 - 3.5.1 -+ [Riley McNiff] added FreeImage_ConvertTo4Bits -* [Herve Drolon] fixed a buffer overrun with some ILBM images -* [Riley McNiff] fixed a potential problem when reading TIFF resolution info -* [Dimitar Atanasov] fixed a bug in FreeImage_Clone function -* [Dimitar Atanasov] fixed several bugs in TIFF plugin - -November 1st, 2004 - 3.5.0 -! FreeImage now uses libPNG 1.2.7 -! FreeImage now uses libTIFF 3.7.0 -! FreeImage now uses libMNG 1.0.8 -! [Herve Drolon] improved TIFF LZW compression using a predictor -! [Detlev Vendt] FreeImagesPlus: corrected references to FreeImage.h and FreeImage.lib -+ [Herve Drolon] added support for loading/saving of 8-bit transparent TIFF -+ [Riley McNiff] added support for 4-bit dib in FreeImage_Paste -+ [Herve Drolon] added support for memory IO streams (see below) -+ added FreeImage_OpenMemory -+ added FreeImage_CloseMemory -+ added FreeImage_LoadFromMemory -+ added FreeImage_SaveToMemory -+ added FreeImage_TellMemory -+ added FreeImage_SeekMemory -+ added FreeImage_AcquireMemory -+ added FreeImage_GetFileTypeFromMemory -+ [Petr Pytelka] added FreeImage_GetFIFMimeType to the plugins function list -+ [Herve Drolon] added ICC profile support to JPEG plugin -+ [Herve Drolon] added support for metadata (see below) -+ added FreeImage_SetMetadata -+ added FreeImage_GetMetadata -+ added FreeImage_GetMetadataCount -+ added FreeImage_TagToString -+ added FreeImage_FindFirstMetadata -+ added FreeImage_FindNextMetadata -+ added FreeImage_FindCloseMetadata -* [Riley McNiff] fixed a bug with FreeImage_SetPixelIndex and 4-bit images -* [Petr Pytelka] fixed returned value in FreeImage_CloseMultiBitmap -* [Petr Pytelka] fixed index of new page in FreeImage_InsertPage -* [Aaron Shumate] fixed a minor bug in PNG plugin -* [Aaron Shumate] fixed a bug in IFF plugin (odd-length chunks) -* [Rupert Hewitt] fixed FreeImage not compiling on National Instruments Cvi Ccompiler -* [Herve Drolon] fixed a bug in IFF plugin (ILBM data) -* [Fred Harju] added a Makefile for Solaris 9 -* [Roddy Pratt] fixed FreeImage not linking under Borland C++ Builder -* [Vadim Alexandrov] fixed a memory leak in the multipage API -* [Herve Drolon] fixed a bug with DDS plugin behaviour on Big Endian OS -* [Herve Drolon] fixed a bug with conversion of JPEG resolution info on saving - -July 8th, 2004 - 3.4.0 -! [Jim Keir] improved FreeImage_FlipVertical function -! [Herve Drolon] LZW compression is now enabled in FreeImage -+ [Karl-Heinz Bussian] added constants to FreeImage.h to get at compile time the library version -+ [Karl-Heinz Bussian] added color lookup functions for X11 and SVG -+ [Herve Drolon] added TIFF tags TIFF_CCITTFAX3, TIFF_CCITTFAX4 and TIFF_LZW -+ [Detlev Vendt] added support for CMYK TIFF files with alpha channel -+ [Detlev Vendt] added (re-introduction of) PluginGIF -* [Herve Drolon] fixed a bug with loading of FAX TIFF images (introduced with LibTIFF 3.6.1) -* [Herve Drolon] fixed a bug in Floyd-Steinberg dithering algorithm -* [Herve Drolon] fixed a bug in Targa plugin save function -* [Herve Drolon] fixed a bug in FreeImage_AdjustCurve function -* [Ryan Rubley] fixed a bug with FreeImage_Rescale's filters accuracy -* [Ryan Rubley] fixed a bug in NN quantizer -* [Herve Drolon] fixed a bug with TIFF files containing additional Photoshop alpha channels -* [James Rossfeld] fixed a memory leak with some PSD images -* [Herve Drolon] fixed a bug with saving of 32-bit non transparent PNG images -* [Alexandr Zamaraev] fixed FreeImage not compiling with mingw32 -* [Herve Drolon] fixed FreeImage not compiling with VC.NET (pow function needs casts) - -May 2, 2004 - 3.3.0 -! [Ryan Rubley] FreeImage has been ported to MacOSX and should also work on other big endian processors -+ [Ryan Rubley] rewrote XPM plugin (better load support) and added save support -+ [Ryan Rubley] added ICO_MAKEALPHA flag to ICO plugin -+ [Ryan Rubley] Set/GetPixelColor now works with 16-bit pixels (555 or 565) -+ [Herve Drolon] PNG plugin now supports loading and saving of unsigned 16-bit greyscale images -* [Herve Drolon] fixed a bug with loading of 8-bit and 16-bit PNG with a 8-bit alpha channel -* [Herve Drolon] fixed a bug in NN quantizer algorithm with handling of 4-byte boundary alignment. -* [Herve Drolon] fixed a bug in PluginIFF Validate function -* [Herve Drolon] fixed a minor design issue in FreeImage_GetFIFFromFormat -* [Brad Schick] fixed some compiler warnings with VC++ 7.1 -* [Herve Drolon] fixed a bug with saving of 8-bit palettized images to 24-bit JPEG (channel inversion) - -March 16, 2004 - 3.2.1 -! [Volker Gärtner] improved the DDS plugin -! [Herve Drolon] FreeImage_Rescale now works on 8-, 24- and 32-bit images -! [Herve Drolon] FreeImage_Copy now works on 1-, 4-, 8-, 16-, 24- and 32-bit images -* [Floris van den Berg] fixed a bug in the MultiPage cache mechanism -* [Herve Drolon] fixed a bug with loading/saving of 8-bit transparent tga images -* [Herve Drolon] fixed a bug with loading of 1-bit TIFF (introduced with LibTIFF 3.6.1) - -February 18, 2004 - 3.2.0 -! FreeImage now uses libTIFF 3.6.1 -+ [Herve Drolon] added FreeImage_HasBackgroundColor -+ [Herve Drolon] added FreeImage_GetBackgroundColor -+ [Herve Drolon] added FreeImage_SetBackgroundColor -+ [Herve Drolon] added FreeImage_Composite -+ [Herve Drolon] added ICC profile support to PNG plugin -+ [Herve Drolon] added background color support to PNG plugin -+ [Volker Gärtner] added support for DDS format (loader) -* [Steve Johnson] improved FreeImage_OpenMultiBitmap/FreeImage_CloseMultiBitmap -* [Steve Johnson] fixed a bug in FreeImage_InsertPage -* [Herve Drolon] fixed a bug with JPEG compressed TIFF (red/blue swapping) -* [Herve Drolon] fixed a bug in PluginTarga where 8-bit images were saved incorrectly - -January 26, 2004 - 3.1.0 -! FreeImage now uses ZLib 1.2.1 -+ [Herve Drolon] added support for integer, real and complex image types (see below) -+ added FREE_IMAGE_TYPE enum -+ added FreeImage_AllocateT -+ added FreeImage_GetImageType -+ added FreeImage_FIFSupportsExportType -+ added FreeImage_ConvertToStandardType -+ added FreeImage_ConvertToType -+ added load/save support of all image types to TIFF plugin -+ [Peter Lemmens] added a Validate function to TARGA plugin -+ [Herve Drolon] added FreeImage_GetPixelIndex / FreeImage_SetPixelIndex -+ [Herve Drolon] added FreeImage_GetPixelColor / FreeImage_SetPixelColor -+ [Herve Drolon] added FreeImage_GetComplexChannel / FreeImage_SetComplexChannel -* [Serge Ivanchenko] TIFF_DEFLATE compression is now enabled in TIFF plugin -* [Herve Drolon] fixed a bug in NeuQuant color reduction algorithm - -November 16, 2003 - 3.0.4 -* [Tobias Persson] fixed FreeImage_GetChannel not working with FICC_ALPHA channel -* [Detlev Vendt] fixed a minor bug with PNG plugin and PNG_IGNOREGAMMA flag -* [Detlev Vendt] fixed a memory leak in PNG plugin save routine -* [Detlev Vendt] fixed JPEG validation problem with .jpe files -* [Ryan Rubley] added Source/LibTIFF/tif_extension.c to LibTIFF (needed for MacOSX) -* [Herve Drolon] improved error handling in TIFF plugin -+ [Karl-Heinz Bussian] added FreeImage_IsLittleEndian -+ [Karl-Heinz Bussian] added JPEG save support for 8-bit miniswhite bitmaps (transparent conversion to minisblack) -+ [Karl-Heinz Bussian] FreeImage_GetColorType now recognizes 8-bit FIC_MINISWHITE images -! [Herve Drolon] FreeImage_Rescale now supports rescaling of 32-bit images with alpha channel -! [Herve Drolon] FreeImage_Invert now supports inversion of 32-bit images with alpha channel -! [Herve Drolon] FreeImage_AdjustCurve now supports working with FICC_ALPHA channel - -November 2, 2003 - 3.0.3 -* [Ryan Rubley] improved makefile for Linux -* [Ryan Rubley] fixed FreeImage not compiling under MacOSX -* [Detlev Vendt] fixed still present inconsistancy with 32bpp transparency handling -* [Herve Drolon] fixed incorrect loading of 4-bit greyscale images in TIFF plugin - -October 27, 2003 - 3.0.2 -! FreeImage now uses libMNG 1.0.6 -* [Herve Drolon] fixed a boolean test in PluginCUT returning always false -* [Herve Drolon] fixed a warning in PluginIFF generated with g++ -* [Linus Tan] fixed a bug in FreeImage_Copy -* [Herve Drolon] fixed FreeImage not compiling under Linux (thanks to Michal) - -October 20, 2003 - 3.0.1 - -! FreeImage now uses libTIFF 3.6.0 -* [Detlev Vendt] fixed incorrect definition of the FREE_IMAGE_FORMAT enum -* [Detlev Vendt] fixed a potential crash problem with Load / Save routines -* [Herve Drolon] fixed incorrect loading of 16-bit greyscale images in TIFF plugin -* [Dennis Lim] fixed a memory leak in Floyd & Steinberg dithering routine -* [Herve Drolon] fixed a bug in BMP loader (incorrect loading of RLE4 bmp) -* [Detlev Vendt] fixed some inconsistancy with 32bpp transparency handling -+ [David Boland] added a C# wrapper -// Linux compatibility issues -- [Michal Novotny] removed the round function in Utilities.h -! [Herve Drolon] replaced the round routine by the clamp routine in PluginPCD -+ [Herve Drolon] added _itoa version in Utilities.h -* [Michal Novotny] fixed untyped consts not accepted by g++ in PluginBMP - -September 8, 2003 - 3.0.0 -- [Herve Drolon] removed deprecated functions -- [Herve Drolon] removed deprecated flags (TARGA_LOAD_RGB555, ICO_*, except ICO_DEFAULT) -- [Herve Drolon] removed the FreeImage pointer table (internal stuff) -+ [Herve Drolon] added a C++ wrapper -+ [Herve Drolon] added the FreeImage Toolkit (see below) -+ added FreeImage_Rescale -+ added FreeImage_RotateClassic -+ added FreeImage_RotateEx -+ added FreeImage_FlipHorizontal -+ added FreeImage_FlipVertical -+ added FreeImage_Invert -+ added FreeImage_AdjustCurve -+ added FreeImage_AdjustGamma -+ added FreeImage_AdjustBrightness -+ added FreeImage_AdjustContrast -+ added FreeImage_GetHistogram -+ added FreeImage_GetChannel -+ added FreeImage_SetChannel -+ added FreeImage_Copy -+ added FreeImage_Paste -+ [Karl-Heinz Bussian] added XPM loader -+ [Karl-Heinz Bussian] added flags parameter to FreeImage_CloseMultiBitmap -+ [Karl-Heinz Bussian] added JPEG save support for 8-bit palettized bitmaps (transparent conversion to 24-bit) -+ [Herve Drolon] added interface to ZLib compression functions -+ [Herve Drolon] added ICO format to the multipage API (loader & writer) -+ [Herve Drolon] added a MIME type to all plugins -* [Karl-Heinz Bussian] fixed incorrect conversion from 1-bit FIC_MINISWHITE bitmaps to 8-bit -* [Herve Drolon] fixed a bug in FreeImage_CloseMultiBitmap -* [Herve Drolon] fixed a potential memory leak in conversion functions (8-, 24-, 32-bit) -* [Robert Walker] fixed incorrect conversion from 16-bit to 24-bit and 16-bit to 32-bit -* [blurble] fixed TIFF validate signature problem (3DS files were recognized as TIFF) -* [Kurt Jankowski-Tepe] fixed FreeImage not compiling on MinGW / LCC WIN32 -* [Jani Peltonen] fixed bug in PluginTARGA where 32-bit bitmaps are not always correctly flipped -* [Detlev Vendt] fixed a bug with TIFF (memory leak with ICC profiles) - -May 25, 2003 - 2.6.1 -+ [Detlev Vendt] added FIC_CMYK to FREE_IMAGE_COLOR_TYPE -+ [Detlev Vendt] added ICC profile support to the library (see below) -+ added FreeImage_GetICCProfile -+ added FreeImage_CreateICCProfile -+ added FreeImage_DestroyICCProfile -+ added FIICCPROFILE & FIICCPROFILE flags -+ added plugin function FreeImage_FIFSupportsICCProfiles -+ [Detlev Vendt] added ICC profile support for TIFF -+ [Herve Drolon] added XBM (X11 Bitmap Format) support : loading -* [Herve Drolon] fixed incorrect IFF file detection (thanks Floris) -* [Herve Drolon] fixed incorrect conversion from 1/4-bit greyscale bitmaps to 8-bit -* [Herve Drolon] fixed a bug in TIFF writer when saving 1,4,8 bit dib (introduced in 2.6.0, sorry) -* [Herve Drolon] fixed a palette problem in TIFF loader when loading 1-bit b & w images -* [Herve Drolon] improved FreeImage_Dither to handle any bitdepth - -May 5th, 2003 - 2.6.0 -! FreeImage now uses libPNG 1.2.5 -! FreeImage now uses libMNG 1.0.5 -! [Markus Loibl] ActiveX wrapper is now distributed in a separate release (since 2.5.5) -! [Herve Drolon] the function FreeImage_Free is now deprecated : use FreeImage_Unload instead -! [Herve Drolon] updated the generic samples and removed deprecated functions -+ [Detlev Vendt] added CMYK support to TIFF save function -+ [Detlev Vendt] added TIFF_SAVE_CMYK flag constant -+ [Detlev Vendt] added 32-bit support (with transparency handling) to TIFF plugin -+ [Herve Drolon] added FreeImage_Threshold -+ [Herve Drolon] added FreeImage_Dither -+ [Herve Drolon] added FREE_IMAGE_DITHER parameter to FreeImage_Dither -* [Herve Drolon] improved error handling in PluginMNG -* [Herve Drolon] improved TIFF flags handling in TIFF save function -* [Herve Drolon] fixed a potential crash-problem in FreeImage_OutputMessage (in case of a null message) -* [Detlev Vendt] fixed a bug with the deprecated FreeImage_GetBitsRowCol (trailling backslash behind the DEPRECATE macro) - -July 24th, 2002 - 2.5.5 -! FreeImage now uses libPNG 1.2.4 -! FreeImage now uses libMNG 1.0.4 -+ [Markus Loibl] added ActiveX wrapper - -June 22th, 2002 - 2.5.4 -* [Timothy Roughton] fixed FreeImage not compiling on LCC WIN32 -* [Markus Loibl] fixed PluginTIFF sometimes saving with wrong X/Y resolution -* fixed crashbug when loading some RLE4 BMPs -! FreeImage now uses LibPNG 1.2.3 -! [Markus Loibl] improved startup plugin locate handling code -! [Gerhard Gruber] made some changes so that FreeImage compiles on VC5 -+ [Markus Loibl] added flags TIFF_PACKBITS, TIFF_DEFLATE, TIFF_ADOBE_DEFLATE and TIFF_NONE - -May 21th, 2002 - 2.5.3 -* fixed wrong colors when loading 16-bit grayscale TIFF -* fixed crash-problem with FreeImageQt -* fixed PluginTIFF saving some bitmaps flipped vertically -* [Laurent Rocher] fixed bug in FreeImage_GetLockedPageNumbers -* [Laurent Rocher] fixed bug in FreeImage_UnlockPage -! FreeImage now uses libpng 1.2.2 -+ added TARGA save support -+ added BMP RLE8 save support - -March 30th, 2002 - 2.5.2 -* fixed bug in PluginTARGA where 32-bit bitmaps are not always correctly flipped -* fixed FreeImage_GetLockedPageNumber being mentioned in FreeImage.h -* fixed crash bug when handling read-only multipage bitmaps -- removed internal function FreeImage_GetExtraDataPointer -! FreeImage now uses zlib 1.1.4 -+ added function FreeImage_GetLockedPageNumbers - -March 2nd 2002 - 2.5.1 -* fixed pluginTIFF not being able to save 32-bit bitmaps -* fixed not being able to save PNM bitmaps through the LoadXXX wrappers -* fixed a webcam generated BMP image being loaded with wrong colors -! FI_ReadProc, FI_WriteProc, etc. do now carry the DLL_CALLCONV flag -! the function FreeImage_GetBitsRowCol is now deprecated -! FreeImage_SetTransparencyTable now taken an integer as count parameter -! FreeImage_IsTransparent now always returns true for 32-bit bitmaps -! PluginPNG::Save now ignores the result of FreeImage_IsTransparent -! PluginTIFF now converts all 32-bit bitmaps to 24-bit, until our patch - to fully support alpha in TIFF is applied in libtiff -+ added full multi-paging support -+ added octal and hexadecimal number support to FreeImage_OutputMessage - -January 3rd 2002 - 2.5.0 -* fixed bug in FreeImage_SaveJPEG -* fixed bug in FreeImage_LoadMNG -* fixed bug in FreeImage_LoadPNG -* fixed small Visual C++ 5.0 compiler issue in PluginMNG.cpp -* fixed FreeImage crashing on JPEG 6.0 encoded TIFFs -! FreeImage now uses libTIFF 3.5.7 -! FreeImage now uses libPNG 1.2.1 -! all the FreeImage_LoadXXX and FreeImage_SaveXXX functions are now deprecated -+ added Dr. Halo (*.cut) support -+ added printf-like format string support to SetOutputMessage -+ added basic multi-paging support: open, close, counting and grabbing -+ added deprecation manager -+ added FreeImage_Clone function - -October 3rd 2001 - 2.4.2 -* fixed missing BI_BITFIELDS support for 32-bit BMPs -* fixed bug in FreeImage_ConvertLine16_555_To16_565 and vice versa -* fixed bug in FreeImage_ConvertToRawBits -* fixed PluginTIFF behaving incorrectly on PHOTOMETRIC_MASK images -* fixed 16 bit TIFFs not loading correctly -* fixed incorrect handling of CCITTFAX3 and CCITTFAX4 TIFFs -* fixed JPEG encoded TIFFs not being supported -! [Yours Detlev] patched libTIFF to handle EXTRASAMPLE_UNSPECIFIED -! [Juergen Riecker] improved speed of PCX loading a lot -! rewrote parts of FreeImage to improve support for c -! the internal RGB555 and RGB565 macros now read BGR instead of RGB -! FreeImage now uses libMNG 1.0.3 -! FreeImage now uses libPNG 1.2.0 -! FreeImage_Save now opens files with the "w+b" flag -! renamed internal macro CalculateUsedColors to CalculateUsedPaletteEntries -! enabling/disabling plugins no longer has effect on FIFSupportsReading -! enabling/disabling plugins no longer has effect on FIFSupportsWriting -+ added flag PNG_IGNOREGAMMA -+ added function FreeImage_FIFSupportsExportBPP - -July 30th 2001 - 2.4.1 -* [Jan Nauta] fixed some plugin ids not being passed to plugins -* [Jan Nauta] fixed some functions being natively called instead of indirect -* [Jan Nauta] fixed BMPs with signature BA not being regognised -* [Remo Eichenberger] fixed memory leak in the plugin system -* fixed seek bug in PluginIFF's Validate -* fixed transparency issue in PluginPNG -* fixed uncaught exceptions in WUQuantizer and NNQuantizer -* fixed some problems with PluginTARGA -* fixed some problems with PluginICO -* fixed some problems with PluginBMP -! improved FreeImageQt's load function a little -! tell/seek control for validation is now handled inside the plugin framework - -July 22th 2001 - 2.4.0 -* (Yours Detlev) fixed memory leak in FreeImage_GetFIFFromFilename -* (Yours Detlev) fixed memory leak in the ICO plugin -* (Yours Detlev) fixed memory leak in the PNG plugin -* fixed potential NULL-pointer access bug in Plugin::AddNode -* fixed problems with linking the static lib -- removed LBM plugin. Its functionality is placed in the IFF plugin now -- removed FreeImage_GetFIFByIndex -! FreeImage now uses LibMNG 1.0.2 -! FreeImage_SetTransparent now only enables alpha when the bitmap is 8 or 32 bit -! FreeImage_SetTransparencyTable now only enables alpha when the bitmap is 8 bit -! FreeImage_LoadLBM now uses Mark Sibly's IFF plugin -! FreeImage_SaveBMP now converts to 24-bit when bpp is 32 and transparency is off -! FreeImage_SaveJPEG now converts to 24-bit when bpp is 32 and transparency is off -! FreeImage_SavePNM now converts to 24-bit when bpp is 32 and transparency is off -! FreeImage_SaveTIFF now converts to 24-bit when bpp is 32 and transparency is off -+ [Mark Sibly] added IFF (ILBM) support -+ added basic support for Photoshop files -+ added mime type support (FreeImage_GetFIFFromMime) -+ added functions FreeImage_SetPluginEnabled and FreeImage_IsPluginEnabled - Disabling plugins modifies the behaviour of the following functions: - * FreeImage_LoadFromHandle - * FreeImage_SaveToHandle - * FreeImage_FIFSupportsReading - * FreeImage_FIFSupportsWriting - * FreeImage_GetFIFFromFormat - * FreeImage_GetFIFFromFilename - * FreeImage_GetFIFFromMime - * FreeImage_Validate - -June 30th 2001 - 2.3.2 -* fixed missing "targa" extension in targa extension list -* fixed small memory leak in PluginList::AddNode -* fixed 32 bit PNG saving suddenly disappeared from the distro? -* fixed 'black line' bug in LoadTARGA -- removed project FreeImageM2 -- removed FreeImage_Combine -! FreeImage_RegisterLocalPlugin now receives a FI_InitProc as first parameter -! FreeImage_GetFIFFromFilename now also takes the format id into account -! cleanup up the code a little for PluginPCD and PluginPCX -+ added static lib project - -June 11th 2001 - 2.3.1 -* [Machiel ten Brinke] fixed the loading of some 'ancient' TARGAs -* [Rui Lopes] fixed some bugs in the external plugin registration -* fixed the plugin system crashing when the init function isn't called -- removed project FreeImagePy -- removed 32 to 24 bit conversion while saving PNG in FreeImageQt -! the scanline convert functions are now accessable in plugins -! FreeImage now uses an STL map to store the plugin list -! PluginSDK.h is now integrated into FreeImage.h -! FreeImage_Register now receives the boolean parameter 'load_local_plugins_only' -! FreeImage now uses LibPNG 1.0.12 -+ [Rui Lopes] added plugin for GIF reading/writing support -+ added function FreeImage_SetTransparencyCount -+ added support for 32 bit PNG saving -+ added FreeImage_RegisterLocalPlugin to allow plugins inside apps -+ added FreeImage_RegisterExternalPlugin to manually load DLLs -+ added plugin for JBIG reading/writing support - -May 4th 2001 - 2.3.0 -* [Martin Weber] fixed some small bugs in the TARGA and BMP plugins -* [Martin Weber] fixed tiny bug in new 16 bit conversions -* [Martin Weber] fixed load flag inconsistency in the TARGA plugin -* [Martin Weber] fixed plugin id / load reference inconsistency for PNM -* [Jan Nauta] fixed bug in conversion 16 -> 16 -* [Herve Drolon] fixed small bug in 4-bit PCX loader -- removed code that loads BMPs renamed to ICO in PluginICO -! the flag TARGA_LOAD_RGB555 is now obsolete -! the plugin list is now sorted internally -! ConvertTo32Bits now stores the transparency table as alpha mask -! FreeImage now uses LibMNG 1.0,1 -! FreeImage now uses LibPNG 1.0.11 -+ added external plugin support via DLLs -+ added function FreeImage_GetFIFByIndex -+ added internal function CalculateScanLine -+ added transparency support for high-color PNGs -+ added transparency support for high-color TIFFs -+ added functions FreeImage_SetTransparent and FreeImage_IsTransparent -+ added constant FIC_RGBALPHA to FREE_IMAGE_COLOR_TYPE - -April 5th 2001 - 2.2.0 -* [Remo Eichenberger] fixed small bug concerning DLLMain and static LIB generation -* fixed 1-bit bitmaps not properly loading in FreeImageQt -* fixed bug in conversion 16->16 -* FreeImage now uses LibPNG 1.0.10 -! [Martin Weber] improved loading of BMP files -! [Martin Weber] improved loading of TARGA files -! [Dave Larson] improved visual appearance after 16 conversions -! FreeImageQt now converts 32-bit bitmaps to 24-bit when saving PNGs and JPEGs -+ added functions FreeImage_Initialise and FreeImage_DeInitialise -+ added internal plugins -+ re-added combine/alphablend functions - -March 8th 2001 - 2.1.0 -* [Martin Hemming] fixed bug in 16-bit TARGA loading code -* fixed PNG's with alpha masks not loading correctly -! FreeImage is now dual-licensed: the FI-License and the GPL license -! FreeImage now uses LibPNG 1.0.9 -! FreeImage now uses LibTIFF 3.5.6 Beta -! FreeImage now uses LiBMNG 1.0.0 -! changed the ordering of the FREE_IMAGE_FORMAT table -! improved linux support -! improved test script -+ added transparency table support to SavePNG -+ added BI_BITFIELDS support to LoadBMP and SaveBMP -+ added reading support for OS/2 2.x BMPs -+ added support for MNG and JNG reading using LibMNG -+ added support for Deluxe Paint reading -+ added 'hot swap' support to the Core DLL -+ added 'hot swap' support to FreeImage Qt -+ added functions GetFIFFromFormat and GetFIFFromFilename -+ added functions FIFSupportsReading and FIFSupportsWriting -+ added function GetFIFRegExpr - -January 14th 2001 - 2.0.0 -* [Herve Drolon] fixed a bug in the conversion 4->8 -* [Herve Drolon] fixed a bug in metrics handling in SaveJPEG -* [Herve Drolon] fixed a bug in the return value of the function SaveTIFF -* fixed the presence of two WuQuantizer.cpp files in the distribution -* fixed bug where a BMP renamed to ICO isn't loaded -- removed FreeImage_ConvertToGreyScale. Use FreeImage_ConvertTo8Bits instead. -- removed the boolean parameters from all conversion routines -- removed page handling in LoadTIFF. A new range of functions will be added. -! The void pointers used in FreeImage are now typed -! LoadBMP now takes palettes in 24/32 bit images in respect -! All effects and MMX functions are now stored in a new library (FreeEffects) -! [Herve Drolon] fixed bug in FreeImage_GetColorType -! [Herve Drolon] improved PCX loader. It can now read 1, 4, 8 and 24-bit images -! [Manfred Tausch] improved FreeImage_Rotate -! [Luca Piergentili] fixed crash bug when saving some 1-bit TIFFs -! rewrote all bitdepth conversion routines making use of the new scanline converters -! rewrote bitdepth conversion in FreeImageQt (uses less memory) -! FreeImage is now compiled __stdcall -+ [Herve Drolon] added WBMP (Wireless Bitmap Format) support: loading and saving -+ [Herve Drolon] added 4, 16 and 32 bitdepth handling in GetColorType -+ [Herve Drolon] added handling of 8-bit greyscale bitmaps in SaveJPEG -+ [Herve Drolon] added NeuQuant color reduction algorithm to ColorQuantize -+ added DLL_CALLCONV (calling convention) flag -+ added bitmask support to all bitmaps -+ added a series of functions converting scanlines from one bitdepth to another -+ added functions ConvertFromRawBits and ConvertToRawBits -+ added project FreeImageM2: Magenta II MMT bindings for FreeImage -+ added basic foundation for linux support - -December 2th 2000 - 1.4.4 -* fixed small bug related to TIFFSetDirectory in FreeImage_LoadTIFF -* fixed FreeImage_Rotate sometimes clipping too much pixels -* fixed other small bug in FreeImage_Rotate -* fixed FreeImage_Clone not taking the FREEIMAGEHEADER in account -* fixed bug in FreeImageQt where 1-bit images are not correctly allocated -* fixed FreeImage_Crop not copying the palette -* fixed message function pointer crash bug -* fixed bug where the palette wasn't copied when saving in FreeImageQt -* fixed FreeImage_Clone not copying the transparency table -- removed FreeImage_WritePaletteEntry -! [Adam Gates] rewrote parts of FreeImage so that c compilers can handle it better -! FreeImageQt doesn't statically link with the FreeImage lib anymore -! FreeImageQt now uses atexit() to automatically unregister -! rewrote parts of FreeImage_LoadBMP to increase speed -+ [Markus Loibl] added metrics handling code to LoadBMP, LoadJPEG, LoadTIFF and LoadPCX -+ added metrics handling code to FreeImageQt -+ added functions FIQT_IsLoaded, FIQT_GetVersion and FIQT_GetCopyrightMessage -+ added conversion 1 -> 16 -+ added FreeImage_SaveJPEG and JPEG quality settings -+ added FreeImage_GetBitsRowCol -+ added function FIQT_SetOutputMessage to FreeImageQt -+ added FreeImage_GetFileTypeFromExtension and FIQT_GetFileTypeFromFormat -+ added project FreeImagePy: python bindings for FreeImage - -November 7th 2000 - 1.4.3 -* fixed FreeImage_SavePNG crash bug -* fixed slighly corrupt size filter in FreeImage_Combine -* fixed FreeImage_SaveTIFF not saving 4-bit images -* [Herve Drolon] fixed bug in FreeImage_LoadTIFF -* [Herve Drolon] fixed bug in FreeImage_GetColorType -- removed fclose from FreeImage_SavePNM (who put it there?) -! rewrote FreeImage_Rotate -! FreeImageQt now automatically detects which formats are supported by Qt and which not -! FreeImage_Allocate now returns a void pointer -! FreeImage_Unload is now called FreeImage_Free -+ added 16-bit 5-5-5 support to FreeImage_LoadBMP -+ added RLE_DELTA support to FreeImage_LoadBMP -+ added directory support to FreeImage_LoadTIFF -+ added functions dealing with transparency -+ added transparency support to 8-bit PNG's in Qt -+ added FREE_IMAGE_QUANTIZE parameter to FreeImage_ColorQuantize -+ added custom FREEIMAGEHEADER header prepended to internal bitmaps -+ added new documentation - -October 18th 2000 - 1.4.2 -* fixed FreeImage_SaveBMP storing an incorrect bfSize value in the BITMAPFILEHEADER -* fixed bug where JPEG and PNG wouldn't load in FreeImageQt -* fixed FreeImage_Mirror mirroring one pixel less than needed -! FreeImage_MaskedCombine24 is now called FreeImage_MaskedCombine24Ex -! FreeImage_MaskedCombine32 is now called FreeImage_MaskedCombine32Ex -+ added 16-bit bitmap support to FreeImage_Mirror -+ added 16-bit bitmap support to FreeImage_ConvertTo8Bits -+ added simple version of FreeImage_MaskedCombine24 -+ added simple version of FreeImage_MaskedCombine32 - -October 17th 2000 - 1.4.1 -* [Herve Drolon] fixed bug in FreeImage_ConvertTo8Bits -* fixed bug in conversion with 16 -> 24 and 16 -> 32 -- removed static library support -- removed all unnecessary files from LibTIFF, LibPNG, LibJPEG and ZLib -- removed all absolute seeks from the library -! FreeImageQt now makes use of the DLL distro -! rebuilt the entire directory structure -! improved handling of BMP -! renamed FreeImage_MaskedCombine to FreeImage_MaskedCombine32 -+ [Alexander Dymerets] added 24-bit masked alpha blending with a seperate alpha mask -+ added FreeImage_Rotate (known bug in degrees 76 to 106) -+ added 4-bit bitmap support to FreeImage_ConvertTo16Bits -+ added 8-bit bitmap support to FreeImage_ConvertTo16Bits -+ added 32-bit bitmap support to FreeImage_ConvertTo16Bits -+ added 32-bit bitmap support to FreeImage_Mirror -+ added 16-bit 5-5-5 support to FreeImage_ConvertTo24Bits -+ added 16-bit 5-5-5 support to FreeImage_ConvertTo32Bits - -October 2th 2000 - 1.4.0 -* [Jani Kajala] fixed bug in conversion with 4 -> 24 and 8 -> 32 -* [Jani Kajala] fixed bug in FreeImage_Flip -* [Jani Kajala] fixed minor bug in FreeImage_LoadBMP -- [Herve Drolon] removed PBMFlags, PGMFlags and PPMFlags -- [Herve Drolon] removed FI_LoadGeneric -- removed FreeImage_Win32.h -! [Herve Drolon] changed FI_GetFileType -! [Herve Drolon] replaced FI_LoadPBM, FI_LoadPGM and FI_LoadPPM with FI_LoadPNM -! [Herve Drolon] improved FreeImage_LoadPNG -! FreeImage_WritePaletteEntry is now exported -+ [Herve Drolon] added FreeImage_SavePNG -+ [Herve Drolon] added FreeImage_SavePNM and PNMFlags -+ [Herve Drolon] added XXXFlags parameter to save functions -+ [Herve Drolon] added FreeImage_LoadRAS and FIF_RAS -+ added FreeImage_GetFileTypeFromExt - -September 7th 2000 - 1.3.5 -+ added conversion 4 -> 8 to FI_ConvertTo8Bits -+ added simple version of FI_GetFileType -+ added project FreeImageQt; a port of the library to the TrollTech library - -August 31th 2000 - 1.3.4 -* fixed 'ice effect' bug in new 24 bit PCX code -* fixed some bugs with the conversion 16 -> 24 and 16 -> 32 -! FI_Blur now returns void -! A debug build of the library now produces FreeImaged.dll and FreeImaged.lib -! TARGA_LOAD_ARGB8888 is now called TARGA_LOAD_RGB888 -! Alpha channels are now automatically loaded unless TARGA_LOAD_RGB888 is specified -! cleaned up the code a lot -+ added 32-bit bitmap support to FreeImage_ConvertToGreyscale -+ added support for 32-bit bottom-left TARGA images -+ added internal functions FreeImage_WritePaletteEntry() and FreeImage_GetScanLine() -+ added FreeImage_Win32.h, containing Windows functions needed to create DIBs -+ added documentation through Doxygen - -July 30th 2000 - 1.3.3 -* [Jani Kajala] fixed some bugs with the conversion 4 -> 24 and 8 -> 24 -* [Jani Kajala] fixed some bugs with the conversion 4 -> 32 and 8 -> 32 -* fixed bug in FI_LoadPNM's ASCII number loader -! [Herve Drolon] improved FI_LoadPNG -! [Herve Drolon] changed FI_ConvertToGreyScale (added changeable macro for conversion) -! improved FI_ConvertTo24Bits -! improved FI_ConvertTo32Bits -! freeImage now uses LibPNG 1.0.8 -+ [Herve Drolon] added FI_ColorQuantize, based on Wu's color quantizer -+ added the conversion 1 -> 24 -+ added the conversion 1 -> 32 -+ added FI_ConvertTo8Bits -+ added FI_Invert (very useful for image processing) -+ added FI_GetColorType and 'enum FREE_IMAGE_COLOR_TYPE' - -June 30th 2000 - 1.3.2 -- removed color reduction functions from the project -! [Herve Drolon] Improved FI_LoadTIFF code -! renamed FI_ToGrayscale to FI_ConvertToGreyScale -! renamed FI_IncreaseColors to FI_ConvertTo24Bits -! LoadBMP now supports 32-bit bitmaps -! [Jani Kajala] Improved FI_LoadTARGA and FI_LoadPCX code -+ added FI_ConvertTo32Bits to convert a bitmap to 32-bit -+ added FI_MaskCombine to combine two 32-bit bitmaps using a alpha mask -+ added FI_AddAlphaMask to enrich a 32-bit bitmap with an alpha mask -+ added FI_SaveTIFF -+ added 16-bit bitmap (565) support to the ConvertToXXX functions. -+ added FI_ConvertTo16Bits (555 and 565) - -June 1th 2000 - 1.3.1 -- removed Standard Template Library (STL) code -* [Jani Kajala] fixed minor bug in FI_LoadTARGA -* [Jani Kajala] fixed some minor bugs in FI_LoadPCX -! streamlined FI_LoadJPEG a little -! FreeImage now uses LibPNG 1.0.6 -! FreeImage now uses LibTIFF 3.5.5 -! FreeImage now uses malloc and free instead of new and delete -+ introduced compiler flags to disable certain features in the DLL -+ added experimental nearest color reduction (FI_ReduceColorsNearestColor) - -April 13th 2000 - 1.3.0 -* fixed some 8 bit PCX files loading incorrectly -* fixed tiny bug in internally used CalculateUsedColors function -- removed FI_SaveXPM. Only BMP is supported now. -- removed Windows dependencies for easier porting -! optimized FI_LoadKOALA a little -! optimized FI_Combine using MMX technology -! FI_Combine now receives an 'unsigned integer' as alpha blend parameter -! FI_InCreaseColors and FI_ReduceColors don't dispose the old bitmap anymore -+ added PNM support (PGM, PPM and PBM; both binary and ascii) -+ [Alexander Dymerets] added FI_EnableMMX and FI_DisableMMX -+ added various effect functions (FI_Blur, FI_Brighten and FI_Crop) - -March 1st 2000 - 1.2.1 -* fixed some 24 bit PCX files loading incorrectly - -February 8th 2000 - 1.2.0 -* fixed last bitmap data block in JPEG files being truncated -* fixed 4/8 bit BMP's incorrectly loading when the palette is smaller than the bitcount predicts -- removed FI_Load. There is no reliable way to identify all image formats -- removed FI_SetJpegDecodeMode. - Mode selection is now done using the 'DataEnum data' parameter of FI_LoadJPEG -! read_proc/write_proc/tell_proc in FreeImageIO now are same as fread/fwrite/ftell -+ added a 'DataEnum data' parameter to all FI_LoadXXX functions. -+ added 16 bit TARGA support -+ added RLE support for TARGA images -+ added FI_GetDIBSize to get the size of a DIB in bytes -+ added Kodak PhotoCD support (Base, Base/4 and Base/16 encoding) -+ added KOALA support -+ added FI_GetFileType. Note: there is no reliable way to identify TARGA, ICO and PCD. Therefore they have been excluded -In KOALA files only the files converted by a C64 emulator can be identified. -+ added FI_Combine to combine two 24-bit bitmaps with (optional) alpha blending - -January 15th 2000 - 1.1.1 -! FI_Copy is now called FI_Clone -+ added FI_ToGrayscale to convert a color bitmap to grayscale -+ added 32 bit TARGA support -+ added FI_IncreaseColors to increase the bitmap bitdepth from 4/8 bit to 24 bit - -January 14th 2000 - 1.1.0 -* FI_MIRROR: fixed nibbles not being mirrored in 4 bit images -* FI_MIRROR: fixed bits not being mirrored in 1 bit images -* fixed improper loading of 1, 4 and 8 bit OS/2 BMP's -* fixed some inconsistensies in the calculation of lines and pitches -* fixed incorrectly loading of Huffman and FAX encoded TIFFs -* fixed LoadTGA accepting 16 bit TGA's and returning corrupt DIB's -- removed LZW support for TIFFs -! FreeImage now uses LibTIFF 3.5.4 -+ added ICO support -+ added overridable file I/O support in the form of FreeImageIO and fi_handle -+ added FI_Load for generic image loading -+ added FI_ReduceColors for color reduction -+ added FI_Copy to copy a bitmap in memory - -January 5th 2000 - 1.0.0 diff --git a/extern/FreeImage/license-fi.txt b/extern/FreeImage/license-fi.txt deleted file mode 100644 index 03b666c..0000000 --- a/extern/FreeImage/license-fi.txt +++ /dev/null @@ -1,142 +0,0 @@ -FreeImage Public License - Version 1.0 ---------------------------------------------- - -1. Definitions. - -1.1. "Contributor" means each entity that creates or contributes to the creation of Modifications. - -1.2. "Contributor Version" means the combination of the Original Code, prior Modifications used by a Contributor, and the Modifications made by that particular Contributor. - -1.3. "Covered Code" means the Original Code or Modifications or the combination of the Original Code and Modifications, in each case including portions thereof. - -1.4. "Electronic Distribution Mechanism" means a mechanism generally accepted in the software development community for the electronic transfer of data. - -1.5. "Executable" means Covered Code in any form other than Source Code. - -1.6. "Initial Developer" means the individual or entity identified as the Initial Developer in the Source Code notice required by Exhibit A. - -1.7. "Larger Work" means a work which combines Covered Code or portions thereof with code not governed by the terms of this License. - -1.8. "License" means this document. - -1.9. "Modifications" means any addition to or deletion from the substance or structure of either the Original Code or any previous Modifications. When Covered Code is released as a series of files, a -Modification is: - -A. Any addition to or deletion from the contents of a file containing Original Code or previous Modifications. - -B. Any new file that contains any part of the Original Code or previous Modifications. - -1.10. "Original Code" means Source Code of computer software code which is described in the Source Code notice required by Exhibit A as Original Code, and which, at the time of its release under this License is not already Covered Code governed by this License. - -1.11. "Source Code" means the preferred form of the Covered Code for making modifications to it, including all modules it contains, plus any associated interface definition files, scripts used to control -compilation and installation of an Executable, or a list of source code differential comparisons against either the Original Code or another well known, available Covered Code of the Contributor's choice. The Source Code can be in a compressed or archival form, provided the appropriate decompression or de-archiving software is widely available for no charge. - -1.12. "You" means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License or a future version of this License issued under Section 6.1. For legal entities, "You" includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the -direction or management of such entity, whether by contract or otherwise, or (b) ownership of fifty percent (50%) or more of the outstanding shares or beneficial ownership of such entity. - -2. Source Code License. - -2.1. The Initial Developer Grant. -The Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license, subject to third party intellectual property claims: - -(a) to use, reproduce, modify, display, perform, sublicense and distribute the Original Code (or portions thereof) with or without Modifications, or as part of a Larger Work; and - -(b) under patents now or hereafter owned or controlled by Initial Developer, to make, have made, use and sell ("Utilize") the Original Code (or portions thereof), but solely to the extent that -any such patent is reasonably necessary to enable You to Utilize the Original Code (or portions thereof) and not to any greater extent that may be necessary to Utilize further Modifications or -combinations. - -2.2. Contributor Grant. -Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license, subject to third party intellectual property claims: - -(a) to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof) either on an unmodified basis, with other Modifications, as Covered Code or as part of a Larger Work; and - -(b) under patents now or hereafter owned or controlled by Contributor, to Utilize the Contributor Version (or portions thereof), but solely to the extent that any such patent is reasonably necessary to enable You to Utilize the Contributor Version (or portions thereof), and not to any greater extent that -may be necessary to Utilize further Modifications or combinations. - -3. Distribution Obligations. - -3.1. Application of License. -The Modifications which You create or to which You contribute are governed by the terms of this License, including without limitation Section 2.2. The Source Code version of Covered Code may be distributed only under the terms of this License or a future version of this License released under Section 6.1, and You must include a copy of this License with every copy of the Source Code You distribute. You may not offer or impose any terms on any Source Code version that alters or -restricts the applicable version of this License or the recipients' rights hereunder. However, You may include an additional document offering the additional rights described in Section 3.5. - -3.2. Availability of Source Code. -Any Modification which You create or to which You contribute must be made available in Source Code form under the terms of this License either on the same media as an Executable version or via an accepted Electronic Distribution Mechanism to anyone to whom you made an Executable version available; and if made available via Electronic Distribution Mechanism, must remain available for at least twelve (12) months after the date it initially became available, or at least six (6) months after a subsequent version of that particular Modification has been made available to such recipients. You are responsible for ensuring that the Source Code version remains available even if the Electronic Distribution Mechanism is maintained by a third party. - -3.3. Description of Modifications. -You must cause all Covered Code to which you contribute to contain a file documenting the changes You made to create that Covered Code and the date of any change. You must include a prominent statement that the Modification is derived, directly or indirectly, from Original Code provided by the Initial Developer and including the name of the Initial Developer in (a) the Source Code, and (b) in any notice in an Executable version or related documentation in which You describe the origin or ownership of the Covered Code. - -3.4. Intellectual Property Matters - -(a) Third Party Claims. -If You have knowledge that a party claims an intellectual property right in particular functionality or code (or its utilization under this License), you must include a text file with the source code distribution titled "LEGAL" which describes the claim and the party making the claim in sufficient detail that a recipient will know whom to contact. If you obtain such knowledge after You make Your Modification available as described in Section 3.2, You shall promptly modify the LEGAL file in all copies You make -available thereafter and shall take other steps (such as notifying appropriate mailing lists or newsgroups) reasonably calculated to inform those who received the Covered Code that new knowledge has been obtained. - -(b) Contributor APIs. -If Your Modification is an application programming interface and You own or control patents which are reasonably necessary to implement that API, you must also include this information in the LEGAL file. - -3.5. Required Notices. -You must duplicate the notice in Exhibit A in each file of the Source Code, and this License in any documentation for the Source Code, where You describe recipients' rights relating to Covered Code. If You created one or more Modification(s), You may add your name as a Contributor to the notice described in Exhibit A. If it is not possible to put such notice in a particular Source Code file due to its -structure, then you must include such notice in a location (such as a relevant directory file) where a user would be likely to look for such a notice. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Code. However, You may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear than any such warranty, support, indemnity or -liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of -warranty, support, indemnity or liability terms You offer. - -3.6. Distribution of Executable Versions. -You may distribute Covered Code in Executable form only if the requirements of Section 3.1-3.5 have been met for that Covered Code, and if You include a notice stating that the Source Code version of the Covered Code is available under the terms of this License, including a description of how and where You have fulfilled the obligations of Section 3.2. The notice must be conspicuously included in any notice in an Executable version, related documentation or collateral in which You -describe recipients' rights relating to the Covered Code. You may distribute the Executable version of Covered Code under a license of Your choice, which may contain terms different from this License, -provided that You are in compliance with the terms of this License and that the license for the Executable version does not attempt to limit or alter the recipient's rights in the Source Code version from the rights set forth in this License. If You distribute the Executable version under a different license You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or any Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. - -3.7. Larger Works. -You may create a Larger Work by combining Covered Code with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Code. - -4. Inability to Comply Due to Statute or Regulation. - -If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Code due to statute or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be included in the LEGAL file described in Section 3.4 and must be included with all distributions of the Source Code. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. - -5. Application of this License. - -This License applies to code to which the Initial Developer has attached the notice in Exhibit A, and to related Covered Code. - -6. Versions of the License. - -6.1. New Versions. -Floris van den Berg may publish revised and/or new versions of the License from time to time. Each version will be given a distinguishing version number. - -6.2. Effect of New Versions. -Once Covered Code has been published under a particular version of the License, You may always continue to use it under the terms of that version. You may also choose to use such Covered Code under the terms of any subsequent version of the License published by Floris van den Berg -No one other than Floris van den Berg has the right to modify the terms applicable to Covered Code created under this License. - -6.3. Derivative Works. -If you create or use a modified version of this License (which you may only do in order to apply it to code which is not already Covered Code governed by this License), you must (a) rename Your license so that the phrases "FreeImage", `FreeImage Public License", "FIPL", or any confusingly similar phrase do not appear anywhere in your license and (b) otherwise make it clear that your version of the license contains terms which differ from the FreeImage Public License. (Filling in the name of the Initial Developer, Original Code or Contributor in the notice described in Exhibit A shall not of themselves be deemed to be modifications of this License.) - -7. DISCLAIMER OF WARRANTY. - -COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED CODE IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. - -8. TERMINATION. - -This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. All sublicenses to the Covered Code which are properly granted shall survive any termination of this License. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. - -9. LIMITATION OF LIABILITY. - -UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE -EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THAT EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. - -10. U.S. GOVERNMENT END USERS. - -The Covered Code is a "commercial item," as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer software" and "commercial computer software documentation," as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Code with only those rights set forth herein. - -11. MISCELLANEOUS. - -This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by Dutch law provisions (except to the extent applicable law, if any, provides otherwise), excluding its conflict-of-law provisions. With respect to disputes in which at least one party is a citizen of, or an entity chartered or registered to do business in, the The Netherlands: (a) unless otherwise agreed in writing, all disputes relating to this License (excepting any dispute relating to intellectual property rights) shall be subject to final and binding arbitration, with the losing party paying all costs of arbitration; (b) any arbitration relating to this Agreement shall be held in Almelo, The Netherlands; and (c) any litigation relating to this Agreement shall be subject to the jurisdiction of the court of Almelo, The Netherlands with the losing party responsible for costs, including without limitation, court costs and reasonable attorneys fees and expenses. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. - -12. RESPONSIBILITY FOR CLAIMS. - -Except in cases where another Contributor has failed to comply with Section 3.4, You are responsible for damages arising, directly or indirectly, out of Your utilization of rights under this License, based -on the number of copies of Covered Code you made available, the revenues you received from utilizing such rights, and other relevant factors. You agree to work with affected parties to distribute -responsibility on an equitable basis. - -EXHIBIT A. - -"The contents of this file are subject to the FreeImage Public License Version 1.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://home.wxs.nl/~flvdberg/freeimage-license.txt - -Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. \ No newline at end of file diff --git a/extern/FreeImage/license-gpl.txt b/extern/FreeImage/license-gpl.txt deleted file mode 100644 index 7d1f860..0000000 --- a/extern/FreeImage/license-gpl.txt +++ /dev/null @@ -1,342 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. - - diff --git a/extern/gnuwin32/CMakeLists.txt b/extern/gnuwin32/CMakeLists.txt deleted file mode 100644 index 7dd11ba..0000000 --- a/extern/gnuwin32/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ - -INSTALL(PROGRAMS - bin/win32/jpeg62.dll - bin/win32/libpng12.dll - bin/win32/libtiff3.dll - bin/win32/zlib1.dll - DESTINATION bin) diff --git a/gnuwin32/CMakeLists.txt b/gnuwin32/CMakeLists.txt new file mode 100644 index 0000000..ab3c0cb --- /dev/null +++ b/gnuwin32/CMakeLists.txt @@ -0,0 +1,7 @@ + +INSTALL(PROGRAMS + bin/jpeg62.dll + bin/libpng13.dll + bin/libtiff3.dll + bin/zlib1.dll + DESTINATION bin) diff --git a/extern/gnuwin32/bin/win32/jpeg62.dll b/gnuwin32/bin/jpeg62.dll similarity index 100% rename from extern/gnuwin32/bin/win32/jpeg62.dll rename to gnuwin32/bin/jpeg62.dll diff --git a/extern/gnuwin32/bin/win32/libpng12.dll b/gnuwin32/bin/libpng12.dll similarity index 100% rename from extern/gnuwin32/bin/win32/libpng12.dll rename to gnuwin32/bin/libpng12.dll diff --git a/extern/gnuwin32/bin/win32/libtiff3.dll b/gnuwin32/bin/libtiff3.dll similarity index 100% rename from extern/gnuwin32/bin/win32/libtiff3.dll rename to gnuwin32/bin/libtiff3.dll diff --git a/extern/gnuwin32/bin/win32/zlib1.dll b/gnuwin32/bin/zlib1.dll similarity index 100% rename from extern/gnuwin32/bin/win32/zlib1.dll rename to gnuwin32/bin/zlib1.dll diff --git a/extern/gnuwin32/include/jconfig.h b/gnuwin32/include/jconfig.h similarity index 100% rename from extern/gnuwin32/include/jconfig.h rename to gnuwin32/include/jconfig.h diff --git a/extern/gnuwin32/include/jerror.h b/gnuwin32/include/jerror.h similarity index 100% rename from extern/gnuwin32/include/jerror.h rename to gnuwin32/include/jerror.h diff --git a/extern/gnuwin32/include/jmorecfg.h b/gnuwin32/include/jmorecfg.h similarity index 100% rename from extern/gnuwin32/include/jmorecfg.h rename to gnuwin32/include/jmorecfg.h diff --git a/extern/gnuwin32/include/jpeglib.h b/gnuwin32/include/jpeglib.h similarity index 100% rename from extern/gnuwin32/include/jpeglib.h rename to gnuwin32/include/jpeglib.h diff --git a/extern/gnuwin32/include/png.h b/gnuwin32/include/png.h similarity index 100% rename from extern/gnuwin32/include/png.h rename to gnuwin32/include/png.h diff --git a/extern/gnuwin32/include/pngconf.h b/gnuwin32/include/pngconf.h similarity index 100% rename from extern/gnuwin32/include/pngconf.h rename to gnuwin32/include/pngconf.h diff --git a/extern/gnuwin32/include/tiff.h b/gnuwin32/include/tiff.h similarity index 100% rename from extern/gnuwin32/include/tiff.h rename to gnuwin32/include/tiff.h diff --git a/extern/gnuwin32/include/tiffconf.h b/gnuwin32/include/tiffconf.h similarity index 100% rename from extern/gnuwin32/include/tiffconf.h rename to gnuwin32/include/tiffconf.h diff --git a/extern/gnuwin32/include/tiffio.h b/gnuwin32/include/tiffio.h similarity index 100% rename from extern/gnuwin32/include/tiffio.h rename to gnuwin32/include/tiffio.h diff --git a/extern/gnuwin32/include/tiffvers.h b/gnuwin32/include/tiffvers.h similarity index 100% rename from extern/gnuwin32/include/tiffvers.h rename to gnuwin32/include/tiffvers.h diff --git a/extern/gnuwin32/include/zconf.h b/gnuwin32/include/zconf.h similarity index 100% rename from extern/gnuwin32/include/zconf.h rename to gnuwin32/include/zconf.h diff --git a/extern/gnuwin32/include/zlib.h b/gnuwin32/include/zlib.h similarity index 100% rename from extern/gnuwin32/include/zlib.h rename to gnuwin32/include/zlib.h diff --git a/extern/gnuwin32/lib/jpeg.def b/gnuwin32/lib/jpeg.def similarity index 100% rename from extern/gnuwin32/lib/jpeg.def rename to gnuwin32/lib/jpeg.def diff --git a/extern/gnuwin32/lib/jpeg.lib b/gnuwin32/lib/jpeg.lib similarity index 100% rename from extern/gnuwin32/lib/jpeg.lib rename to gnuwin32/lib/jpeg.lib diff --git a/gnuwin32/lib/libjpeg.dll.a b/gnuwin32/lib/libjpeg.dll.a new file mode 100644 index 0000000..d5dfc6c Binary files /dev/null and b/gnuwin32/lib/libjpeg.dll.a differ diff --git a/gnuwin32/lib/libpng.a b/gnuwin32/lib/libpng.a new file mode 100644 index 0000000..e8b58ae Binary files /dev/null and b/gnuwin32/lib/libpng.a differ diff --git a/extern/gnuwin32/lib/libpng.def b/gnuwin32/lib/libpng.def similarity index 100% rename from extern/gnuwin32/lib/libpng.def rename to gnuwin32/lib/libpng.def diff --git a/gnuwin32/lib/libpng.dll.a b/gnuwin32/lib/libpng.dll.a new file mode 100644 index 0000000..cba2810 Binary files /dev/null and b/gnuwin32/lib/libpng.dll.a differ diff --git a/extern/gnuwin32/lib/libpng.lib b/gnuwin32/lib/libpng.lib similarity index 100% rename from extern/gnuwin32/lib/libpng.lib rename to gnuwin32/lib/libpng.lib diff --git a/extern/gnuwin32/lib/libpng12.def b/gnuwin32/lib/libpng12.def similarity index 100% rename from extern/gnuwin32/lib/libpng12.def rename to gnuwin32/lib/libpng12.def diff --git a/gnuwin32/lib/libpng12.dll.a b/gnuwin32/lib/libpng12.dll.a new file mode 100644 index 0000000..7d42d0d Binary files /dev/null and b/gnuwin32/lib/libpng12.dll.a differ diff --git a/extern/gnuwin32/lib/libtiff.def b/gnuwin32/lib/libtiff.def similarity index 100% rename from extern/gnuwin32/lib/libtiff.def rename to gnuwin32/lib/libtiff.def diff --git a/gnuwin32/lib/libtiff.dll.a b/gnuwin32/lib/libtiff.dll.a new file mode 100644 index 0000000..0b4b212 Binary files /dev/null and b/gnuwin32/lib/libtiff.dll.a differ diff --git a/gnuwin32/lib/libz.a b/gnuwin32/lib/libz.a new file mode 100644 index 0000000..876f898 Binary files /dev/null and b/gnuwin32/lib/libz.a differ diff --git a/gnuwin32/lib/libz.dll.a b/gnuwin32/lib/libz.dll.a new file mode 100644 index 0000000..fd18995 Binary files /dev/null and b/gnuwin32/lib/libz.dll.a differ diff --git a/extern/gnuwin32/lib/tiff.lib b/gnuwin32/lib/tiff.lib similarity index 100% rename from extern/gnuwin32/lib/tiff.lib rename to gnuwin32/lib/tiff.lib diff --git a/extern/gnuwin32/lib/zlib.def b/gnuwin32/lib/zlib.def similarity index 100% rename from extern/gnuwin32/lib/zlib.def rename to gnuwin32/lib/zlib.def diff --git a/extern/gnuwin32/lib/zlib.lib b/gnuwin32/lib/zlib.lib similarity index 100% rename from extern/gnuwin32/lib/zlib.lib rename to gnuwin32/lib/zlib.lib diff --git a/project/vc8/.ignore b/project/vc8/.ignore deleted file mode 100644 index b351d81..0000000 --- a/project/vc8/.ignore +++ /dev/null @@ -1,13 +0,0 @@ -nvtt.suo -nvtt.ncb -release.win32 -debug.win32 -release.x64 -debug.x64 -release (no cuda).win32 -debug (no cuda).win32 -release (no cuda).x64 -debug (no cuda).x64 -Release -Debug -*.icastano.user diff --git a/project/vc8/Nvidia.TextureTools/TextureTools.cs b/project/vc8/Nvidia.TextureTools/TextureTools.cs index 9b57249..74cdc9b 100644 --- a/project/vc8/Nvidia.TextureTools/TextureTools.cs +++ b/project/vc8/Nvidia.TextureTools/TextureTools.cs @@ -428,8 +428,8 @@ namespace Nvidia.TextureTools [DllImport("nvtt", CharSet = CharSet.Ansi), SuppressUnmanagedCodeSecurity] private extern static void nvttSetOutputOptionsFileName(IntPtr outputOptions, string fileName); - //[DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - //private extern static void nvttSetOutputOptionsErrorHandler(IntPtr outputOptions, ErrorHandler errorHandler); + [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] + private extern static void nvttSetOutputOptionsErrorHandler(IntPtr outputOptions, ErrorHandler errorHandler); private void ErrorCallback(Error error) { @@ -449,7 +449,7 @@ namespace Nvidia.TextureTools public OutputOptions() { options = nvttCreateOutputOptions(); - //nvttSetOutputOptionsErrorHandler(options, new ErrorHandler(ErrorCallback)); + nvttSetOutputOptionsErrorHandler(options, new ErrorHandler(ErrorCallback)); } ~OutputOptions() { diff --git a/project/vc8/PhotoshopExporter/PhotoshopExporter.cpp b/project/vc8/PhotoshopExporter/PhotoshopExporter.cpp deleted file mode 100644 index 30c12c6..0000000 --- a/project/vc8/PhotoshopExporter/PhotoshopExporter.cpp +++ /dev/null @@ -1,76 +0,0 @@ - -#include "PhotoshopExporter.h" - -#include - -SPBasicSuite * sSPBasic = NULL; - - - -void InitGlobals (Ptr globalPtr) -{ - Globals * globals = (Globals *)globalPtr; - - // Set default values. - globals->queryForParameters = true; -} - - - - -DLLExport MACPASCAL void PluginMain (const short selector, - void *exportParamBlock, - long *data, - short *result) -{ - if (selector == exportSelectorAbout) - { - sSPBasic = ((AboutRecord*)exportParamBlock)->sSPBasic; - //DoAbout((AboutRecordPtr)exportParamBlock); - } - else - { - sSPBasic = ((ExportRecordPtr)exportParamBlock)->sSPBasic; - - // Allocate and initialize globals. - Ptr globalPtr = AllocateGlobals ((uint32)result, (uint32)exportParamBlock, ((ExportRecordPtr)exportParamBlock)->handleProcs, sizeof(Globals), data, InitGlobals); - - if (globalPtr == NULL) - { - *result = memFullErr; - return; - } - - // Get our "globals" variable assigned as a Global Pointer struct with the - // data we've returned: - Globals * globals = (Globals *)globalPtr; - - - //----------------------------------------------------------------------- - // (3) Dispatch selector. - //----------------------------------------------------------------------- - - switch (selector) - { - case exportSelectorPrepare: - // DoPrepare(globals); - break; - case exportSelectorStart: - // DoStart(globals); - break; - case exportSelectorContinue: - // DoContinue(globals); - break; - case exportSelectorFinish: - // DoFinish(globals); - break; - } - - // unlock handle pointing to parameter block and data so it can move - // if memory gets shuffled: - if ((Handle)*data != NULL) - { - PIUnlockHandle((Handle)*data); - } - } -} diff --git a/project/vc8/PhotoshopExporter/PhotoshopExporter.h b/project/vc8/PhotoshopExporter/PhotoshopExporter.h deleted file mode 100644 index 669a6fa..0000000 --- a/project/vc8/PhotoshopExporter/PhotoshopExporter.h +++ /dev/null @@ -1,27 +0,0 @@ - -#ifndef NV_PHOTOSHOP_EXPORTER_H -#define NV_PHOTOSHOP_EXPORTER_H - -#include // Export Photoshop header file. -#include // SDK Utility library. - -// Photoshop crazyness: -//#define gResult (*(globals->result)) -#define gStuff (globals->exportParamBlock) - - -// This is our structure that we use to pass globals between routines: -struct Globals -{ - short * result; // Must always be first in Globals. - ExportRecord * exportParamBlock; // Must always be second in Globals. - - Boolean queryForParameters; - - // ... - -}; - - - -#endif // NV_PHOTOSHOP_EXPORTER_H diff --git a/project/vc8/PhotoshopExporter/PhotoshopExporter.vcproj b/project/vc8/PhotoshopExporter/PhotoshopExporter.vcproj deleted file mode 100644 index fc911a6..0000000 --- a/project/vc8/PhotoshopExporter/PhotoshopExporter.vcproj +++ /dev/null @@ -1,236 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc8/nvassemble/nvassemble.vcproj b/project/vc8/nvassemble/nvassemble.vcproj index 6b5bc3b..fbc8022 100644 --- a/project/vc8/nvassemble/nvassemble.vcproj +++ b/project/vc8/nvassemble/nvassemble.vcproj @@ -21,7 +21,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="1" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" CharacterSet="2" > @@ -136,8 +134,9 @@ /> @@ -191,7 +189,7 @@ /> @@ -206,10 +204,10 @@ /> @@ -243,7 +241,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="1" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" > @@ -278,8 +275,9 @@ /> diff --git a/project/vc8/nvcompress/nvcompress.vcproj b/project/vc8/nvcompress/nvcompress.vcproj index 90c807a..9058e14 100644 --- a/project/vc8/nvcompress/nvcompress.vcproj +++ b/project/vc8/nvcompress/nvcompress.vcproj @@ -23,7 +23,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="1" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" CharacterSet="2" > @@ -204,7 +201,7 @@ /> @@ -304,7 +300,7 @@ Name="VCLinkerTool" OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe" LinkIncremental="1" - AdditionalLibraryDirectories="..\..\..\extern\gnuwin32\lib;..\..\..\extern\FreeImage" + AdditionalLibraryDirectories="..\..\..\gnuwin32\lib" SubSystem="1" OptimizeReferences="2" EnableCOMDATFolding="2" @@ -340,7 +336,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="1" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" CharacterSet="2" > @@ -537,10 +530,10 @@ /> @@ -173,7 +170,7 @@ /> @@ -236,7 +232,7 @@ /> - - @@ -297,10 +289,6 @@ RelativePath="..\..\..\src\nvcore\Memory.cpp" > - - @@ -319,10 +307,6 @@ Filter="h;hpp;hxx;hm;inl;inc;xsd" UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}" > - - @@ -335,10 +319,6 @@ RelativePath="..\..\..\src\nvcore\DefsVcWin32.h" > - - @@ -359,34 +339,10 @@ RelativePath="..\..\..\src\nvcore\Ptr.h" > - - - - - - - - - - - - diff --git a/project/vc8/nvddsinfo/nvddsinfo.vcproj b/project/vc8/nvddsinfo/nvddsinfo.vcproj index 32fc39c..a16c15b 100644 --- a/project/vc8/nvddsinfo/nvddsinfo.vcproj +++ b/project/vc8/nvddsinfo/nvddsinfo.vcproj @@ -21,7 +21,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="1" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" CharacterSet="2" > @@ -205,10 +203,10 @@ /> @@ -242,7 +240,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="1" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" > diff --git a/project/vc8/nvdecompress/nvdecompress.vcproj b/project/vc8/nvdecompress/nvdecompress.vcproj index b02aa7f..a74b828 100644 --- a/project/vc8/nvdecompress/nvdecompress.vcproj +++ b/project/vc8/nvdecompress/nvdecompress.vcproj @@ -23,7 +23,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="1" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" CharacterSet="2" > @@ -204,7 +201,7 @@ /> @@ -305,7 +301,7 @@ Name="VCLinkerTool" OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe" LinkIncremental="1" - AdditionalLibraryDirectories="..\..\..\extern\FreeImage" + AdditionalLibraryDirectories="..\..\..\gnuwin32\lib" SubSystem="1" OptimizeReferences="2" EnableCOMDATFolding="2" diff --git a/project/vc8/nvimage/.ignore b/project/vc8/nvimage/.ignore deleted file mode 100644 index 66b0249..0000000 --- a/project/vc8/nvimage/.ignore +++ /dev/null @@ -1,3 +0,0 @@ -Debug -Release -*.icastano.user diff --git a/project/vc8/nvimage/nvimage.vcproj b/project/vc8/nvimage/nvimage.vcproj index 30efe0f..8f0f44e 100644 --- a/project/vc8/nvimage/nvimage.vcproj +++ b/project/vc8/nvimage/nvimage.vcproj @@ -23,7 +23,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="4" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" CharacterSet="2" > @@ -236,7 +232,7 @@ /> - - @@ -335,10 +327,6 @@ RelativePath="..\..\..\src\nvimage\ColorBlock.h" > - - @@ -367,6 +355,10 @@ RelativePath="..\..\..\src\nvimage\nvimage.h" > + + diff --git a/project/vc8/nvimgdiff/nvimgdiff.vcproj b/project/vc8/nvimgdiff/nvimgdiff.vcproj index 185132c..3441505 100644 --- a/project/vc8/nvimgdiff/nvimgdiff.vcproj +++ b/project/vc8/nvimgdiff/nvimgdiff.vcproj @@ -23,7 +23,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="1" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" CharacterSet="2" > @@ -204,7 +201,7 @@ /> @@ -305,7 +301,7 @@ Name="VCLinkerTool" OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe" LinkIncremental="1" - AdditionalLibraryDirectories="..\..\..\extern\gnuwin32\lib" + AdditionalLibraryDirectories="..\..\..\gnuwin32\lib" SubSystem="1" OptimizeReferences="2" EnableCOMDATFolding="2" diff --git a/project/vc8/nvmath/.ignore b/project/vc8/nvmath/.ignore deleted file mode 100644 index 66b0249..0000000 --- a/project/vc8/nvmath/.ignore +++ /dev/null @@ -1,3 +0,0 @@ -Debug -Release -*.icastano.user diff --git a/project/vc8/nvmath/nvmath.vcproj b/project/vc8/nvmath/nvmath.vcproj index 45688d0..8dc38d1 100644 --- a/project/vc8/nvmath/nvmath.vcproj +++ b/project/vc8/nvmath/nvmath.vcproj @@ -23,7 +23,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="4" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" CharacterSet="2" > @@ -173,7 +170,7 @@ /> @@ -236,7 +232,7 @@ /> - - - - @@ -307,22 +295,10 @@ RelativePath="..\..\..\src\nvmath\Color.h" > - - - - - - diff --git a/project/vc8/nvtt.sln b/project/vc8/nvtt.sln index f5d27bc..aa42b9a 100644 --- a/project/vc8/nvtt.sln +++ b/project/vc8/nvtt.sln @@ -27,9 +27,9 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squish", "squish\squish.vcp EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvddsinfo.vcproj", "{841B73C5-C679-4EEF-A50A-7D6106642B49}" ProjectSection(ProjectDependencies) = postProject - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} + {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} + {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}" @@ -42,15 +42,15 @@ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}" ProjectSection(ProjectDependencies) = postProject {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} + {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}" ProjectSection(ProjectDependencies) = postProject - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} + {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} + {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}" @@ -62,390 +62,258 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcp EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt-testsuite", "testsuite\testsuite.vcproj", "{317B694E-B5C1-42A6-956F-FC12B69175A6}" - ProjectSection(ProjectDependencies) = postProject - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} - {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} - EndProjectSection -EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug (no cuda)|Any CPU = Debug (no cuda)|Any CPU - Debug (no cuda)|Mixed Platforms = Debug (no cuda)|Mixed Platforms Debug (no cuda)|Win32 = Debug (no cuda)|Win32 Debug (no cuda)|x64 = Debug (no cuda)|x64 Debug|Any CPU = Debug|Any CPU - Debug|Mixed Platforms = Debug|Mixed Platforms Debug|Win32 = Debug|Win32 Debug|x64 = Debug|x64 Release (no cuda)|Any CPU = Release (no cuda)|Any CPU - Release (no cuda)|Mixed Platforms = Release (no cuda)|Mixed Platforms Release (no cuda)|Win32 = Release (no cuda)|Win32 Release (no cuda)|x64 = Release (no cuda)|x64 Release|Any CPU = Release|Any CPU - Release|Mixed Platforms = Release|Mixed Platforms Release|Win32 = Release|Win32 Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|Any CPU.ActiveCfg = Debug (no cuda)|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug (no cuda)|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug (no cuda)|x64 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|Win32.ActiveCfg = Debug (no cuda)|Win32 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|Win32.Build.0 = Debug (no cuda)|Win32 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|x64.ActiveCfg = Debug (no cuda)|x64 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|x64.Build.0 = Debug (no cuda)|x64 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|Any CPU.ActiveCfg = Debug|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|Mixed Platforms.Build.0 = Debug|x64 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|Win32.ActiveCfg = Debug|Win32 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|Win32.Build.0 = Debug|Win32 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|x64.ActiveCfg = Debug|x64 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|x64.Build.0 = Debug|x64 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|Any CPU.ActiveCfg = Release (no cuda)|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release (no cuda)|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|Mixed Platforms.Build.0 = Release (no cuda)|x64 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|Win32.ActiveCfg = Release (no cuda)|Win32 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|Win32.Build.0 = Release (no cuda)|Win32 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|x64.ActiveCfg = Release (no cuda)|x64 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|x64.Build.0 = Release (no cuda)|x64 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|Any CPU.ActiveCfg = Release|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|Mixed Platforms.Build.0 = Release|x64 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|Win32.ActiveCfg = Release|Win32 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|Win32.Build.0 = Release|Win32 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|x64.ActiveCfg = Release|x64 {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|x64.Build.0 = Release|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|Any CPU.ActiveCfg = Debug (no cuda)|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug (no cuda)|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug (no cuda)|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|Win32.ActiveCfg = Debug (no cuda)|Win32 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|Win32.Build.0 = Debug (no cuda)|Win32 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|x64.ActiveCfg = Debug (no cuda)|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|x64.Build.0 = Debug (no cuda)|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|Any CPU.ActiveCfg = Debug|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|Mixed Platforms.Build.0 = Debug|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|Win32.ActiveCfg = Debug|Win32 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|Win32.Build.0 = Debug|Win32 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|x64.ActiveCfg = Debug|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|x64.Build.0 = Debug|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|Any CPU.ActiveCfg = Release (no cuda)|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release (no cuda)|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|Mixed Platforms.Build.0 = Release (no cuda)|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|Win32.ActiveCfg = Release (no cuda)|Win32 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|Win32.Build.0 = Release (no cuda)|Win32 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|x64.ActiveCfg = Release (no cuda)|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|x64.Build.0 = Release (no cuda)|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|Any CPU.ActiveCfg = Release|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|Mixed Platforms.Build.0 = Release|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|Win32.ActiveCfg = Release|Win32 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|Win32.Build.0 = Release|Win32 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|x64.ActiveCfg = Release|x64 {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|x64.Build.0 = Release|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|x64.Build.0 = Debug|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|Any CPU.ActiveCfg = Debug|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|Mixed Platforms.Build.0 = Debug|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|Win32.ActiveCfg = Debug|Win32 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|Win32.Build.0 = Debug|Win32 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|x64.ActiveCfg = Debug|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|x64.Build.0 = Debug|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|Any CPU.ActiveCfg = Release|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|Win32.Build.0 = Release|Win32 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|x64.ActiveCfg = Release|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|x64.Build.0 = Release|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|Any CPU.ActiveCfg = Release|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|Mixed Platforms.Build.0 = Release|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|Win32.ActiveCfg = Release|Win32 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|Win32.Build.0 = Release|Win32 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|x64.ActiveCfg = Release|x64 {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|x64.Build.0 = Release|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|x64.Build.0 = Debug|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|Any CPU.ActiveCfg = Debug|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|Mixed Platforms.Build.0 = Debug|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|Win32.ActiveCfg = Debug|Win32 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|Win32.Build.0 = Debug|Win32 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|x64.ActiveCfg = Debug|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|x64.Build.0 = Debug|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|Any CPU.ActiveCfg = Release|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|Win32.Build.0 = Release|Win32 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|x64.ActiveCfg = Release|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|x64.Build.0 = Release|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|Any CPU.ActiveCfg = Release|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|Mixed Platforms.Build.0 = Release|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|Win32.ActiveCfg = Release|Win32 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|Win32.Build.0 = Release|Win32 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|x64.ActiveCfg = Release|x64 {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|x64.Build.0 = Release|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 {50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 {50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|x64.Build.0 = Debug|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Debug|Any CPU.ActiveCfg = Debug|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug|Mixed Platforms.Build.0 = Debug|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Debug|Win32.ActiveCfg = Debug|Win32 {50C465FE-B308-42BC-894D-89484482AF06}.Debug|Win32.Build.0 = Debug|Win32 {50C465FE-B308-42BC-894D-89484482AF06}.Debug|x64.ActiveCfg = Debug|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Debug|x64.Build.0 = Debug|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|Any CPU.ActiveCfg = Release|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 {50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|Win32.Build.0 = Release|Win32 {50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|x64.ActiveCfg = Release|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|x64.Build.0 = Release|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Release|Any CPU.ActiveCfg = Release|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Release|Mixed Platforms.Build.0 = Release|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Release|Win32.ActiveCfg = Release|Win32 {50C465FE-B308-42BC-894D-89484482AF06}.Release|Win32.Build.0 = Release|Win32 {50C465FE-B308-42BC-894D-89484482AF06}.Release|x64.ActiveCfg = Release|x64 {50C465FE-B308-42BC-894D-89484482AF06}.Release|x64.Build.0 = Release|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|x64.Build.0 = Debug|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|Any CPU.ActiveCfg = Debug|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|Mixed Platforms.Build.0 = Debug|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|Win32.ActiveCfg = Debug|Win32 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|Win32.Build.0 = Debug|Win32 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|x64.ActiveCfg = Debug|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|x64.Build.0 = Debug|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|Any CPU.ActiveCfg = Release|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|Win32.Build.0 = Release|Win32 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|x64.ActiveCfg = Release|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|x64.Build.0 = Release|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|Any CPU.ActiveCfg = Release|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|Mixed Platforms.Build.0 = Release|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|Win32.ActiveCfg = Release|Win32 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|Win32.Build.0 = Release|Win32 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|x64.ActiveCfg = Release|x64 {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|x64.Build.0 = Release|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|x64.Build.0 = Debug|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|Any CPU.ActiveCfg = Debug|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|Mixed Platforms.Build.0 = Debug|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|Win32.ActiveCfg = Debug|Win32 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|Win32.Build.0 = Debug|Win32 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|x64.ActiveCfg = Debug|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|x64.Build.0 = Debug|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|Any CPU.ActiveCfg = Release|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|Win32.Build.0 = Release|Win32 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|x64.ActiveCfg = Release|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|x64.Build.0 = Release|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|Any CPU.ActiveCfg = Release|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|Mixed Platforms.Build.0 = Release|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|Win32.ActiveCfg = Release|Win32 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|Win32.Build.0 = Release|Win32 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|x64.ActiveCfg = Release|x64 {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|x64.Build.0 = Release|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|x64.Build.0 = Debug|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|Any CPU.ActiveCfg = Debug|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|Mixed Platforms.Build.0 = Debug|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|Win32.ActiveCfg = Debug|Win32 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|Win32.Build.0 = Debug|Win32 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|x64.ActiveCfg = Debug|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|x64.Build.0 = Debug|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|Any CPU.ActiveCfg = Release|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|Win32.Build.0 = Release|Win32 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|x64.ActiveCfg = Release|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|x64.Build.0 = Release|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|Any CPU.ActiveCfg = Release|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|Mixed Platforms.Build.0 = Release|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|Win32.ActiveCfg = Release|Win32 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|Win32.Build.0 = Release|Win32 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|x64.ActiveCfg = Release|x64 {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|x64.Build.0 = Release|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|x64.Build.0 = Debug|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|Any CPU.ActiveCfg = Debug|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|Mixed Platforms.Build.0 = Debug|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|Win32.ActiveCfg = Debug|Win32 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|Win32.Build.0 = Debug|Win32 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|x64.ActiveCfg = Debug|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|x64.Build.0 = Debug|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|Any CPU.ActiveCfg = Release|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|Win32.Build.0 = Release|Win32 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|x64.ActiveCfg = Release|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|x64.Build.0 = Release|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|Any CPU.ActiveCfg = Release|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|Mixed Platforms.Build.0 = Release|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|Win32.ActiveCfg = Release|Win32 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|Win32.Build.0 = Release|Win32 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|x64.ActiveCfg = Release|x64 {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|x64.Build.0 = Release|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|x64.Build.0 = Debug|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|Any CPU.ActiveCfg = Debug|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|Mixed Platforms.Build.0 = Debug|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|Win32.ActiveCfg = Debug|Win32 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|Win32.Build.0 = Debug|Win32 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|x64.ActiveCfg = Debug|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|x64.Build.0 = Debug|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|Any CPU.ActiveCfg = Release|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|Win32.Build.0 = Release|Win32 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|x64.ActiveCfg = Release|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|x64.Build.0 = Release|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|Any CPU.ActiveCfg = Release|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|Mixed Platforms.Build.0 = Release|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|Win32.ActiveCfg = Release|Win32 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|Win32.Build.0 = Release|Win32 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|x64.ActiveCfg = Release|x64 {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|x64.Build.0 = Release|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|x64.Build.0 = Debug|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|Any CPU.ActiveCfg = Debug|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|Mixed Platforms.Build.0 = Debug|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|Win32.ActiveCfg = Debug|Win32 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|Win32.Build.0 = Debug|Win32 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|x64.ActiveCfg = Debug|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|x64.Build.0 = Debug|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|Any CPU.ActiveCfg = Release|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|Win32.Build.0 = Release|Win32 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|x64.ActiveCfg = Release|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|x64.Build.0 = Release|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|Any CPU.ActiveCfg = Release|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|Mixed Platforms.Build.0 = Release|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|Win32.ActiveCfg = Release|Win32 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|Win32.Build.0 = Release|Win32 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|x64.ActiveCfg = Release|x64 {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|x64.Build.0 = Release|x64 {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|Any CPU.Build.0 = Debug|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|Win32.ActiveCfg = Debug|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|x64.ActiveCfg = Debug|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|Any CPU.Build.0 = Debug|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|Win32.ActiveCfg = Debug|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|x64.ActiveCfg = Debug|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|Any CPU.ActiveCfg = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|Any CPU.Build.0 = Release|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|Mixed Platforms.Build.0 = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|Win32.ActiveCfg = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|x64.ActiveCfg = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.ActiveCfg = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.Build.0 = Release|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Mixed Platforms.Build.0 = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|x64.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Any CPU.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Mixed Platforms.Build.0 = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Any CPU.ActiveCfg = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Mixed Platforms.Build.0 = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.Build.0 = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|x64.ActiveCfg = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Any CPU.ActiveCfg = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Mixed Platforms.ActiveCfg = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Mixed Platforms.Build.0 = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/project/vc8/nvtt.vsprops b/project/vc8/nvtt.vsprops deleted file mode 100644 index 07204cb..0000000 --- a/project/vc8/nvtt.vsprops +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - diff --git a/project/vc8/nvtt/.ignore b/project/vc8/nvtt/.ignore deleted file mode 100644 index 8c0cc17..0000000 --- a/project/vc8/nvtt/.ignore +++ /dev/null @@ -1,22 +0,0 @@ -Debug -Debug (no cuda) -Release -Release (no cuda) -*.icastano.user -*.gpu -*.ii -*.stub.h -*.stub.c -*.pdb -*.cubin -*.hash -*.cudafe1.cpp -*.cudafe1.c -*.cudafe2.c -*.cu.cpp -*.cu.c -*.fatbin.c -*.ptx -*.linkinfo -*.cpp2.i -*.cpp3.i diff --git a/project/vc8/nvtt/nvtt.rc b/project/vc8/nvtt/nvtt.rc index 089d1e9..6385cf0 100644 --- a/project/vc8/nvtt/nvtt.rc +++ b/project/vc8/nvtt/nvtt.rc @@ -53,8 +53,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 2,1,0,0 - PRODUCTVERSION 2,1,0,0 + FILEVERSION 2,0,6,0 + PRODUCTVERSION 2,0,6,0 FILEFLAGSMASK 0x17L #ifdef _DEBUG FILEFLAGS 0x1L @@ -71,12 +71,12 @@ BEGIN BEGIN VALUE "CompanyName", "NVIDIA Corporation" VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library" - VALUE "FileVersion", "2, 1, 0, 0" + VALUE "FileVersion", "2, 0, 6, 0" VALUE "InternalName", "nvtt" VALUE "LegalCopyright", "Copyright (C) 2007" VALUE "OriginalFilename", "nvtt.dll" VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library" - VALUE "ProductVersion", "2, 1, 0, 0" + VALUE "ProductVersion", "2, 0, 6, 0" END END BLOCK "VarFileInfo" diff --git a/project/vc8/nvtt/nvtt.vcproj b/project/vc8/nvtt/nvtt.vcproj index 5b49328..2db3b50 100644 --- a/project/vc8/nvtt/nvtt.vcproj +++ b/project/vc8/nvtt/nvtt.vcproj @@ -23,7 +23,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="2" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" CharacterSet="2" > @@ -211,10 +208,9 @@ Name="VCCLCompilerTool" EnableIntrinsicFunctions="true" OmitFramePointers="true" - AdditionalIncludeDirectories="$(CUDA_INC_PATH); $(ExternDir)" + AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\gnuwin32\include;$(CUDA_INC_PATH)" PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;NVTT_EXPORTS;NVTT_SHARED;HAVE_CUDA;__SSE2__;__SSE__;__MMX__" RuntimeLibrary="2" - OpenMP="true" UsePrecompiledHeader="0" WarningLevel="3" Detect64BitPortabilityProblems="true" @@ -231,10 +227,10 @@ /> @@ -297,7 +292,7 @@ Name="VCCLCompilerTool" EnableIntrinsicFunctions="true" OmitFramePointers="true" - AdditionalIncludeDirectories="$(CUDA_INC_PATH)" + AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\gnuwin32\include;$(CUDA_INC_PATH)" PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;NVTT_EXPORTS;NVTT_SHARED;HAVE_CUDA;__SSE2__;__SSE__;__MMX__" RuntimeLibrary="2" UsePrecompiledHeader="0" @@ -319,7 +314,7 @@ AdditionalDependencies="cudart.lib" OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).dll" LinkIncremental="1" - AdditionalLibraryDirectories=""$(CUDA_LIB_PATH)"" + AdditionalLibraryDirectories="..\..\..\gnuwin32\lib;"$(CUDA_LIB_PATH)"" SubSystem="2" OptimizeReferences="2" EnableCOMDATFolding="2" @@ -356,7 +351,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="2" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" CharacterSet="0" > @@ -540,7 +532,7 @@ Name="VCCLCompilerTool" EnableIntrinsicFunctions="true" OmitFramePointers="true" - AdditionalIncludeDirectories="" + AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\gnuwin32\include;" PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;NVTT_EXPORTS;NVTT_SHARED;__SSE2__;__SSE__;__MMX__" RuntimeLibrary="2" UsePrecompiledHeader="0" @@ -559,10 +551,10 @@ /> @@ -710,7 +701,7 @@ > @@ -720,8 +711,8 @@ > @@ -730,7 +721,7 @@ > @@ -742,7 +733,7 @@ @@ -761,7 +752,7 @@ @@ -775,11 +766,11 @@ - - - - @@ -223,10 +220,10 @@ /> @@ -305,7 +301,7 @@ Name="VCLinkerTool" OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe" LinkIncremental="1" - AdditionalLibraryDirectories="..\..\..\extern\FreeImage" + AdditionalLibraryDirectories="..\..\..\gnuwin32\lib" SubSystem="1" OptimizeReferences="2" EnableCOMDATFolding="2" diff --git a/project/vc8/squish/squish.vcproj b/project/vc8/squish/squish.vcproj index cd57ab6..e10610d 100644 --- a/project/vc8/squish/squish.vcproj +++ b/project/vc8/squish/squish.vcproj @@ -23,7 +23,6 @@ OutputDirectory="$(ConfigurationName)\$(PlatformName)" IntermediateDirectory="$(ConfigurationName)\$(PlatformName)" ConfigurationType="4" - InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops" CharacterSet="2" > @@ -236,7 +232,7 @@ /> + + @@ -328,19 +328,19 @@ > - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/.ignore b/project/vc9/.ignore deleted file mode 100644 index b351d81..0000000 --- a/project/vc9/.ignore +++ /dev/null @@ -1,13 +0,0 @@ -nvtt.suo -nvtt.ncb -release.win32 -debug.win32 -release.x64 -debug.x64 -release (no cuda).win32 -debug (no cuda).win32 -release (no cuda).x64 -debug (no cuda).x64 -Release -Debug -*.icastano.user diff --git a/project/vc9/Nvidia.TextureTools/Nvidia.TextureTools.csproj b/project/vc9/Nvidia.TextureTools/Nvidia.TextureTools.csproj deleted file mode 100644 index 8ee960c..0000000 --- a/project/vc9/Nvidia.TextureTools/Nvidia.TextureTools.csproj +++ /dev/null @@ -1,53 +0,0 @@ -ïŧŋ - - Debug - AnyCPU - 9.0.21022 - 2.0 - {CAB55C39-8FA9-4912-98D9-E52669C8911D} - Library - Properties - Nvidia.TextureTools - Nvidia.TextureTools - - - 2.0 - - - OnBuildSuccess - - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - - - - - - - - - - - - - \ No newline at end of file diff --git a/project/vc9/Nvidia.TextureTools/Properties/AssemblyInfo.cs b/project/vc9/Nvidia.TextureTools/Properties/AssemblyInfo.cs deleted file mode 100644 index ecd652e..0000000 --- a/project/vc9/Nvidia.TextureTools/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,35 +0,0 @@ -ïŧŋusing System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("Nvidia.TextureTools")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("NVIDIA Corporation")] -[assembly: AssemblyProduct("Nvidia.TextureTools")] -[assembly: AssemblyCopyright("Copyright ÂĐ NVIDIA 2008")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("5fa03fb3-b7a3-4ba8-90e7-545929731356")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Revision and Build Numbers -// by using the '*' as shown below: -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/project/vc9/Nvidia.TextureTools/TextureTools.cs b/project/vc9/Nvidia.TextureTools/TextureTools.cs deleted file mode 100644 index 9b57249..0000000 --- a/project/vc9/Nvidia.TextureTools/TextureTools.cs +++ /dev/null @@ -1,526 +0,0 @@ -using System; -using System.Security; -using System.Runtime.InteropServices; - -namespace Nvidia.TextureTools -{ - #region Enums - - #region public enum Format - /// - /// Compression format. - /// - public enum Format - { - // No compression. - RGB, - RGBA = RGB, - - // DX9 formats. - DXT1, - DXT1a, - DXT3, - DXT5, - DXT5n, - - // DX10 formats. - BC1 = DXT1, - BC1a = DXT1a, - BC2 = DXT3, - BC3 = DXT5, - BC3n = DXT5n, - BC4, - BC5, - } - #endregion - - #region public enum Quality - /// - /// Quality modes. - /// - public enum Quality - { - Fastest, - Normal, - Production, - Highest, - } - #endregion - - #region public enum WrapMode - /// - /// Wrap modes. - /// - public enum WrapMode - { - Clamp, - Repeat, - Mirror, - } - #endregion - - #region public enum TextureType - /// - /// Texture types. - /// - public enum TextureType - { - Texture2D, - TextureCube, - } - #endregion - - #region public enum InputFormat - /// - /// Input formats. - /// - public enum InputFormat - { - BGRA_8UB - } - #endregion - - #region public enum MipmapFilter - /// - /// Mipmap downsampling filters. - /// - public enum MipmapFilter - { - Box, - Triangle, - Kaiser - } - #endregion - - #region public enum ColorTransform - /// - /// Color transformation. - /// - public enum ColorTransform - { - None, - Linear - } - #endregion - - #region public enum RoundMode - /// - /// Extents rounding mode. - /// - public enum RoundMode - { - None, - ToNextPowerOfTwo, - ToNearestPowerOfTwo, - ToPreviousPowerOfTwo - } - #endregion - - #region public enum AlphaMode - /// - /// Alpha mode. - /// - public enum AlphaMode - { - None, - Transparency, - Premultiplied - } - #endregion - - #region public enum Error - /// - /// Error codes. - /// - public enum Error - { - InvalidInput, - UserInterruption, - UnsupportedFeature, - CudaError, - Unknown, - FileOpen, - FileWrite, - } - #endregion - - #endregion - - #region public class InputOptions - /// - /// Input options. - /// - public class InputOptions - { - #region Bindings - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static IntPtr nvttCreateInputOptions(); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttDestroyInputOptions(IntPtr inputOptions); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsTextureLayout(IntPtr inputOptions, TextureType type, int w, int h, int d); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttResetInputOptionsTextureLayout(IntPtr inputOptions); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static bool nvttSetInputOptionsMipmapData(IntPtr inputOptions, IntPtr data, int w, int h, int d, int face, int mipmap); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsFormat(IntPtr inputOptions, InputFormat format); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsAlphaMode(IntPtr inputOptions, AlphaMode alphaMode); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsGamma(IntPtr inputOptions, float inputGamma, float outputGamma); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsWrapMode(IntPtr inputOptions, WrapMode mode); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsMipmapFilter(IntPtr inputOptions, MipmapFilter filter); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsMipmapGeneration(IntPtr inputOptions, bool generateMipmaps, int maxLevel); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsKaiserParameters(IntPtr inputOptions, float width, float alpha, float stretch); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsNormalMap(IntPtr inputOptions, bool b); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsConvertToNormalMap(IntPtr inputOptions, bool convert); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsHeightEvaluation(IntPtr inputOptions, float redScale, float greenScale, float blueScale, float alphaScale); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsNormalFilter(IntPtr inputOptions, float small, float medium, float big, float large); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsNormalizeMipmaps(IntPtr inputOptions, bool b); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsColorTransform(IntPtr inputOptions, ColorTransform t); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsLinearTransfrom(IntPtr inputOptions, int channel, float w0, float w1, float w2, float w3); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsMaxExtents(IntPtr inputOptions, int d); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetInputOptionsRoundMode(IntPtr inputOptions, RoundMode mode); - #endregion - - internal IntPtr options; - - public InputOptions() - { - options = nvttCreateInputOptions(); - } - ~InputOptions() - { - nvttDestroyInputOptions(options); - } - - public void SetTextureLayout(TextureType type, int w, int h, int d) - { - nvttSetInputOptionsTextureLayout(options, type, w, h, d); - } - public void ResetTextureLayout() - { - nvttResetInputOptionsTextureLayout(options); - } - - public void SetMipmapData(IntPtr data, int width, int height, int depth, int face, int mipmap) - { - nvttSetInputOptionsMipmapData(options, data, width, height, depth, face, mipmap); - } - - public void SetFormat(InputFormat format) - { - nvttSetInputOptionsFormat(options, format); - } - - public void SetAlphaMode(AlphaMode alphaMode) - { - nvttSetInputOptionsAlphaMode(options, alphaMode); - } - - public void SetGamma(float inputGamma, float outputGamma) - { - nvttSetInputOptionsGamma(options, inputGamma, outputGamma); - } - - public void SetWrapMode(WrapMode wrapMode) - { - nvttSetInputOptionsWrapMode(options, wrapMode); - } - - public void SetMipmapFilter(MipmapFilter filter) - { - nvttSetInputOptionsMipmapFilter(options, filter); - } - - public void SetMipmapGeneration(bool enabled) - { - nvttSetInputOptionsMipmapGeneration(options, enabled, -1); - } - - public void SetMipmapGeneration(bool enabled, int maxLevel) - { - nvttSetInputOptionsMipmapGeneration(options, enabled, maxLevel); - } - - public void SetKaiserParameters(float width, float alpha, float stretch) - { - nvttSetInputOptionsKaiserParameters(options, width, alpha, stretch); - } - - public void SetNormalMap(bool b) - { - nvttSetInputOptionsNormalMap(options, b); - } - - public void SetConvertToNormalMap(bool convert) - { - nvttSetInputOptionsConvertToNormalMap(options, convert); - } - - public void SetHeightEvaluation(float redScale, float greenScale, float blueScale, float alphaScale) - { - nvttSetInputOptionsHeightEvaluation(options, redScale, greenScale, blueScale, alphaScale); - } - - public void SetNormalFilter(float small, float medium, float big, float large) - { - nvttSetInputOptionsNormalFilter(options, small, medium, big, large); - } - - public void SetNormalizeMipmaps(bool b) - { - nvttSetInputOptionsNormalizeMipmaps(options, b); - } - - public void SetColorTransform(ColorTransform t) - { - nvttSetInputOptionsColorTransform(options, t); - } - - public void SetLinearTransfrom(int channel, float w0, float w1, float w2, float w3) - { - nvttSetInputOptionsLinearTransfrom(options, channel, w0, w1, w2, w3); - } - - public void SetMaxExtents(int dim) - { - nvttSetInputOptionsMaxExtents(options, dim); - } - - public void SetRoundMode(RoundMode mode) - { - nvttSetInputOptionsRoundMode(options, mode); - } - } - #endregion - - #region public class CompressionOptions - /// - /// Compression options. - /// - public class CompressionOptions - { - #region Bindings - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static IntPtr nvttCreateCompressionOptions(); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttDestroyCompressionOptions(IntPtr compressionOptions); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetCompressionOptionsFormat(IntPtr compressionOptions, Format format); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetCompressionOptionsQuality(IntPtr compressionOptions, Quality quality); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetCompressionOptionsColorWeights(IntPtr compressionOptions, float red, float green, float blue, float alpha); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetCompressionOptionsPixelFormat(IntPtr compressionOptions, uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetCompressionOptionsQuantization(IntPtr compressionOptions, bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold); - #endregion - - internal IntPtr options; - - public CompressionOptions() - { - options = nvttCreateCompressionOptions(); - } - ~CompressionOptions() - { - nvttDestroyCompressionOptions(options); - } - - public void SetFormat(Format format) - { - nvttSetCompressionOptionsFormat(options, format); - } - - public void SetQuality(Quality quality) - { - nvttSetCompressionOptionsQuality(options, quality); - } - - public void SetColorWeights(float red, float green, float blue) - { - nvttSetCompressionOptionsColorWeights(options, red, green, blue, 1.0f); - } - - public void SetColorWeights(float red, float green, float blue, float alpha) - { - nvttSetCompressionOptionsColorWeights(options, red, green, blue, alpha); - } - - public void SetPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) - { - nvttSetCompressionOptionsPixelFormat(options, bitcount, rmask, gmask, bmask, amask); - } - - public void SetQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha) - { - nvttSetCompressionOptionsQuantization(options, colorDithering, alphaDithering, binaryAlpha, 127); - } - - public void SetQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold) - { - nvttSetCompressionOptionsQuantization(options, colorDithering, alphaDithering, binaryAlpha, alphaThreshold); - } - } - #endregion - - #region public class OutputOptions - /// - /// Output options. - /// - public class OutputOptions - { - #region Delegates - public delegate void ErrorHandler(Error error); - private delegate void WriteDataDelegate(IntPtr data, int size); - private delegate void ImageDelegate(int size, int width, int height, int depth, int face, int miplevel); - #endregion - - #region Bindings - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static IntPtr nvttCreateOutputOptions(); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttDestroyOutputOptions(IntPtr outputOptions); - - [DllImport("nvtt", CharSet = CharSet.Ansi), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetOutputOptionsFileName(IntPtr outputOptions, string fileName); - - //[DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - //private extern static void nvttSetOutputOptionsErrorHandler(IntPtr outputOptions, ErrorHandler errorHandler); - - private void ErrorCallback(Error error) - { - if (Error != null) Error(error); - } - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttSetOutputOptionsOutputHeader(IntPtr outputOptions, bool b); - - //[DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - //private extern static void nvttSetOutputOptionsOutputHandler(IntPtr outputOptions, WriteDataDelegate writeData, ImageDelegate image); - - #endregion - - internal IntPtr options; - - public OutputOptions() - { - options = nvttCreateOutputOptions(); - //nvttSetOutputOptionsErrorHandler(options, new ErrorHandler(ErrorCallback)); - } - ~OutputOptions() - { - nvttDestroyOutputOptions(options); - } - - public void SetFileName(string fileName) - { - nvttSetOutputOptionsFileName(options, fileName); - } - - public event ErrorHandler Error; - - public void SetOutputHeader(bool b) - { - nvttSetOutputOptionsOutputHeader(options, b); - } - - // @@ Add OutputHandler interface. - } - #endregion - - #region public static class Compressor - public class Compressor - { - #region Bindings - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static IntPtr nvttCreateCompressor(); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static void nvttDestroyCompressor(IntPtr compressor); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static bool nvttCompress(IntPtr compressor, IntPtr inputOptions, IntPtr compressionOptions, IntPtr outputOptions); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private extern static int nvttEstimateSize(IntPtr compressor, IntPtr inputOptions, IntPtr compressionOptions); - - [DllImport("nvtt"), SuppressUnmanagedCodeSecurity] - private static extern IntPtr nvttErrorString(Error error); - - #endregion - - internal IntPtr compressor; - - public Compressor() - { - compressor = nvttCreateCompressor(); - } - - ~Compressor() - { - nvttDestroyCompressor(compressor); - } - - public bool Compress(InputOptions input, CompressionOptions compression, OutputOptions output) - { - return nvttCompress(compressor, input.options, compression.options, output.options); - } - - public int EstimateSize(InputOptions input, CompressionOptions compression) - { - return nvttEstimateSize(compressor, input.options, compression.options); - } - - public static string ErrorString(Error error) - { - return Marshal.PtrToStringAnsi(nvttErrorString(error)); - } - - } - #endregion - -} // Nvidia.TextureTools namespace diff --git a/project/vc9/nvassemble/nvassemble.rc b/project/vc9/nvassemble/nvassemble.rc deleted file mode 100644 index 842ded1..0000000 Binary files a/project/vc9/nvassemble/nvassemble.rc and /dev/null differ diff --git a/project/vc9/nvassemble/nvassemble.vcproj b/project/vc9/nvassemble/nvassemble.vcproj deleted file mode 100644 index 6efdba5..0000000 --- a/project/vc9/nvassemble/nvassemble.vcproj +++ /dev/null @@ -1,357 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/nvassemble/nvidia.ico b/project/vc9/nvassemble/nvidia.ico deleted file mode 100644 index 6aa721e..0000000 Binary files a/project/vc9/nvassemble/nvidia.ico and /dev/null differ diff --git a/project/vc9/nvassemble/resource.h b/project/vc9/nvassemble/resource.h deleted file mode 100644 index e765787..0000000 --- a/project/vc9/nvassemble/resource.h +++ /dev/null @@ -1,16 +0,0 @@ -//{{NO_DEPENDENCIES}} -// Microsoft Visual C++ generated include file. -// Used by nvcompress.rc -// -#define IDI_ICON1 101 - -// Next default values for new objects -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 102 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1000 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif diff --git a/project/vc9/nvcompress/nvcompress.rc b/project/vc9/nvcompress/nvcompress.rc deleted file mode 100644 index 842ded1..0000000 Binary files a/project/vc9/nvcompress/nvcompress.rc and /dev/null differ diff --git a/project/vc9/nvcompress/nvcompress.vcproj b/project/vc9/nvcompress/nvcompress.vcproj deleted file mode 100644 index 3282821..0000000 --- a/project/vc9/nvcompress/nvcompress.vcproj +++ /dev/null @@ -1,691 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/nvcompress/nvidia.ico b/project/vc9/nvcompress/nvidia.ico deleted file mode 100644 index 6aa721e..0000000 Binary files a/project/vc9/nvcompress/nvidia.ico and /dev/null differ diff --git a/project/vc9/nvcompress/resource.h b/project/vc9/nvcompress/resource.h deleted file mode 100644 index e765787..0000000 --- a/project/vc9/nvcompress/resource.h +++ /dev/null @@ -1,16 +0,0 @@ -//{{NO_DEPENDENCIES}} -// Microsoft Visual C++ generated include file. -// Used by nvcompress.rc -// -#define IDI_ICON1 101 - -// Next default values for new objects -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 102 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1000 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif diff --git a/project/vc9/nvconfig.h b/project/vc9/nvconfig.h deleted file mode 100644 index ffe00a1..0000000 --- a/project/vc9/nvconfig.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef NV_CONFIG -#define NV_CONFIG - -//#cmakedefine HAVE_UNISTD_H -#define HAVE_STDARG_H -//#cmakedefine HAVE_SIGNAL_H -//#cmakedefine HAVE_EXECINFO_H -#define HAVE_MALLOC_H - -#if !defined(_M_X64) -#define HAVE_PNG -#define HAVE_JPEG -//#define HAVE_TIFF -#endif - -#endif // NV_CONFIG diff --git a/project/vc9/nvcore/nvcore.vcproj b/project/vc9/nvcore/nvcore.vcproj deleted file mode 100644 index 17ab945..0000000 --- a/project/vc9/nvcore/nvcore.vcproj +++ /dev/null @@ -1,391 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/nvddsinfo/nvddsinfo.rc b/project/vc9/nvddsinfo/nvddsinfo.rc deleted file mode 100644 index 842ded1..0000000 Binary files a/project/vc9/nvddsinfo/nvddsinfo.rc and /dev/null differ diff --git a/project/vc9/nvddsinfo/nvddsinfo.vcproj b/project/vc9/nvddsinfo/nvddsinfo.vcproj deleted file mode 100644 index bf5383e..0000000 --- a/project/vc9/nvddsinfo/nvddsinfo.vcproj +++ /dev/null @@ -1,357 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/nvddsinfo/nvidia.ico b/project/vc9/nvddsinfo/nvidia.ico deleted file mode 100644 index 6aa721e..0000000 Binary files a/project/vc9/nvddsinfo/nvidia.ico and /dev/null differ diff --git a/project/vc9/nvddsinfo/resource.h b/project/vc9/nvddsinfo/resource.h deleted file mode 100644 index e765787..0000000 --- a/project/vc9/nvddsinfo/resource.h +++ /dev/null @@ -1,16 +0,0 @@ -//{{NO_DEPENDENCIES}} -// Microsoft Visual C++ generated include file. -// Used by nvcompress.rc -// -#define IDI_ICON1 101 - -// Next default values for new objects -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 102 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1000 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif diff --git a/project/vc9/nvdecompress/nvdecompress.rc b/project/vc9/nvdecompress/nvdecompress.rc deleted file mode 100644 index 842ded1..0000000 Binary files a/project/vc9/nvdecompress/nvdecompress.rc and /dev/null differ diff --git a/project/vc9/nvdecompress/nvdecompress.vcproj b/project/vc9/nvdecompress/nvdecompress.vcproj deleted file mode 100644 index 8b06c5b..0000000 --- a/project/vc9/nvdecompress/nvdecompress.vcproj +++ /dev/null @@ -1,381 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/nvdecompress/nvidia.ico b/project/vc9/nvdecompress/nvidia.ico deleted file mode 100644 index 6aa721e..0000000 Binary files a/project/vc9/nvdecompress/nvidia.ico and /dev/null differ diff --git a/project/vc9/nvdecompress/resource.h b/project/vc9/nvdecompress/resource.h deleted file mode 100644 index e765787..0000000 --- a/project/vc9/nvdecompress/resource.h +++ /dev/null @@ -1,16 +0,0 @@ -//{{NO_DEPENDENCIES}} -// Microsoft Visual C++ generated include file. -// Used by nvcompress.rc -// -#define IDI_ICON1 101 - -// Next default values for new objects -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 102 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1000 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif diff --git a/project/vc9/nvimage/nvimage.vcproj b/project/vc9/nvimage/nvimage.vcproj deleted file mode 100644 index 55e880d..0000000 --- a/project/vc9/nvimage/nvimage.vcproj +++ /dev/null @@ -1,389 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/nvimgdiff/nvidia.ico b/project/vc9/nvimgdiff/nvidia.ico deleted file mode 100644 index 6aa721e..0000000 Binary files a/project/vc9/nvimgdiff/nvidia.ico and /dev/null differ diff --git a/project/vc9/nvimgdiff/nvimgdiff.rc b/project/vc9/nvimgdiff/nvimgdiff.rc deleted file mode 100644 index 842ded1..0000000 Binary files a/project/vc9/nvimgdiff/nvimgdiff.rc and /dev/null differ diff --git a/project/vc9/nvimgdiff/nvimgdiff.vcproj b/project/vc9/nvimgdiff/nvimgdiff.vcproj deleted file mode 100644 index 434dc81..0000000 --- a/project/vc9/nvimgdiff/nvimgdiff.vcproj +++ /dev/null @@ -1,381 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/nvimgdiff/resource.h b/project/vc9/nvimgdiff/resource.h deleted file mode 100644 index e765787..0000000 --- a/project/vc9/nvimgdiff/resource.h +++ /dev/null @@ -1,16 +0,0 @@ -//{{NO_DEPENDENCIES}} -// Microsoft Visual C++ generated include file. -// Used by nvcompress.rc -// -#define IDI_ICON1 101 - -// Next default values for new objects -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 102 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1000 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif diff --git a/project/vc9/nvmath/nvmath.vcproj b/project/vc9/nvmath/nvmath.vcproj deleted file mode 100644 index dadf125..0000000 --- a/project/vc9/nvmath/nvmath.vcproj +++ /dev/null @@ -1,343 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/nvtt.sln b/project/vc9/nvtt.sln deleted file mode 100644 index ddcb5cd..0000000 --- a/project/vc9/nvtt.sln +++ /dev/null @@ -1,289 +0,0 @@ -ïŧŋ -Microsoft Visual Studio Solution File, Format Version 10.00 -# Visual Studio 2008 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}" - ProjectSection(ProjectDependencies) = postProject - {CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38} - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} - {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}" - ProjectSection(ProjectDependencies) = postProject - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} - {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcore", "nvcore\nvcore.vcproj", "{F143D180-D4C4-4037-B3DE-BE89A21C8D1D}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvmath", "nvmath\nvmath.vcproj", "{50C465FE-B308-42BC-894D-89484482AF06}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squish", "squish\squish.vcproj", "{CE017322-01FC-4851-9C8B-64E9A8E26C38}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvddsinfo.vcproj", "{841B73C5-C679-4EEF-A50A-7D6106642B49}" - ProjectSection(ProjectDependencies) = postProject - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} - {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}" - ProjectSection(ProjectDependencies) = postProject - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} - {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}" - ProjectSection(ProjectDependencies) = postProject - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} - {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}" - ProjectSection(ProjectDependencies) = postProject - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} - {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}" - ProjectSection(ProjectDependencies) = postProject - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} - {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} - EndProjectSection -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stress", "stress\stress.vcproj", "{317B694E-B5C1-42A6-956F-FC12B69175A6}" - ProjectSection(ProjectDependencies) = postProject - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} - {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647} - {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} - EndProjectSection -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug (no cuda)|Win32 = Debug (no cuda)|Win32 - Debug (no cuda)|x64 = Debug (no cuda)|x64 - Debug|Win32 = Debug|Win32 - Debug|x64 = Debug|x64 - Release (no cuda)|Win32 = Release (no cuda)|Win32 - Release (no cuda)|x64 = Release (no cuda)|x64 - Release|Win32 = Release|Win32 - Release|x64 = Release|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|Win32.ActiveCfg = Debug (no cuda)|Win32 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|Win32.Build.0 = Debug (no cuda)|Win32 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|x64.ActiveCfg = Debug (no cuda)|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug (no cuda)|x64.Build.0 = Debug (no cuda)|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|Win32.ActiveCfg = Debug|Win32 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|Win32.Build.0 = Debug|Win32 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|x64.ActiveCfg = Debug|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Debug|x64.Build.0 = Debug|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|Win32.ActiveCfg = Release (no cuda)|Win32 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|Win32.Build.0 = Release (no cuda)|Win32 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|x64.ActiveCfg = Release (no cuda)|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release (no cuda)|x64.Build.0 = Release (no cuda)|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|Win32.ActiveCfg = Release|Win32 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|Win32.Build.0 = Release|Win32 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|x64.ActiveCfg = Release|x64 - {1AEB7681-57D8-48EE-813D-5C41CC38B647}.Release|x64.Build.0 = Release|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|Win32.ActiveCfg = Debug (no cuda)|Win32 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|Win32.Build.0 = Debug (no cuda)|Win32 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|x64.ActiveCfg = Debug (no cuda)|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug (no cuda)|x64.Build.0 = Debug (no cuda)|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|Win32.ActiveCfg = Debug|Win32 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|Win32.Build.0 = Debug|Win32 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|x64.ActiveCfg = Debug|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Debug|x64.Build.0 = Debug|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|Win32.ActiveCfg = Release (no cuda)|Win32 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|Win32.Build.0 = Release (no cuda)|Win32 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|x64.ActiveCfg = Release (no cuda)|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release (no cuda)|x64.Build.0 = Release (no cuda)|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|Win32.ActiveCfg = Release|Win32 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|Win32.Build.0 = Release|Win32 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|x64.ActiveCfg = Release|x64 - {88079E38-83AA-4E8A-B18A-66A78D1B058B}.Release|x64.Build.0 = Release|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug (no cuda)|x64.Build.0 = Debug|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|Win32.ActiveCfg = Debug|Win32 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|Win32.Build.0 = Debug|Win32 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|x64.ActiveCfg = Debug|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Debug|x64.Build.0 = Debug|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|Win32.Build.0 = Release|Win32 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|x64.ActiveCfg = Release|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release (no cuda)|x64.Build.0 = Release|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|Win32.ActiveCfg = Release|Win32 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|Win32.Build.0 = Release|Win32 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|x64.ActiveCfg = Release|x64 - {4046F392-A18B-4C66-9639-3EABFFF5D531}.Release|x64.Build.0 = Release|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug (no cuda)|x64.Build.0 = Debug|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|Win32.ActiveCfg = Debug|Win32 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|Win32.Build.0 = Debug|Win32 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|x64.ActiveCfg = Debug|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Debug|x64.Build.0 = Debug|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|Win32.Build.0 = Release|Win32 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|x64.ActiveCfg = Release|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release (no cuda)|x64.Build.0 = Release|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|Win32.ActiveCfg = Release|Win32 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|Win32.Build.0 = Release|Win32 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|x64.ActiveCfg = Release|x64 - {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}.Release|x64.Build.0 = Release|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug (no cuda)|x64.Build.0 = Debug|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug|Win32.ActiveCfg = Debug|Win32 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug|Win32.Build.0 = Debug|Win32 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug|x64.ActiveCfg = Debug|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Debug|x64.Build.0 = Debug|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 - {50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|Win32.Build.0 = Release|Win32 - {50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|x64.ActiveCfg = Release|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Release (no cuda)|x64.Build.0 = Release|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Release|Win32.ActiveCfg = Release|Win32 - {50C465FE-B308-42BC-894D-89484482AF06}.Release|Win32.Build.0 = Release|Win32 - {50C465FE-B308-42BC-894D-89484482AF06}.Release|x64.ActiveCfg = Release|x64 - {50C465FE-B308-42BC-894D-89484482AF06}.Release|x64.Build.0 = Release|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug (no cuda)|x64.Build.0 = Debug|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|Win32.ActiveCfg = Debug|Win32 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|Win32.Build.0 = Debug|Win32 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|x64.ActiveCfg = Debug|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Debug|x64.Build.0 = Debug|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|Win32.Build.0 = Release|Win32 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|x64.ActiveCfg = Release|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release (no cuda)|x64.Build.0 = Release|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|Win32.ActiveCfg = Release|Win32 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|Win32.Build.0 = Release|Win32 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|x64.ActiveCfg = Release|x64 - {CE017322-01FC-4851-9C8B-64E9A8E26C38}.Release|x64.Build.0 = Release|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug (no cuda)|x64.Build.0 = Debug|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|Win32.ActiveCfg = Debug|Win32 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|Win32.Build.0 = Debug|Win32 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|x64.ActiveCfg = Debug|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Debug|x64.Build.0 = Debug|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|Win32.Build.0 = Release|Win32 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|x64.ActiveCfg = Release|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release (no cuda)|x64.Build.0 = Release|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|Win32.ActiveCfg = Release|Win32 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|Win32.Build.0 = Release|Win32 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|x64.ActiveCfg = Release|x64 - {841B73C5-C679-4EEF-A50A-7D6106642B49}.Release|x64.Build.0 = Release|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug (no cuda)|x64.Build.0 = Debug|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|Win32.ActiveCfg = Debug|Win32 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|Win32.Build.0 = Debug|Win32 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|x64.ActiveCfg = Debug|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Debug|x64.Build.0 = Debug|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|Win32.Build.0 = Release|Win32 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|x64.ActiveCfg = Release|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release (no cuda)|x64.Build.0 = Release|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|Win32.ActiveCfg = Release|Win32 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|Win32.Build.0 = Release|Win32 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|x64.ActiveCfg = Release|x64 - {75A0527D-BFC9-49C3-B46B-CD1A901D5927}.Release|x64.Build.0 = Release|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug (no cuda)|x64.Build.0 = Debug|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|Win32.ActiveCfg = Debug|Win32 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|Win32.Build.0 = Debug|Win32 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|x64.ActiveCfg = Debug|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Debug|x64.Build.0 = Debug|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|Win32.Build.0 = Release|Win32 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|x64.ActiveCfg = Release|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release (no cuda)|x64.Build.0 = Release|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|Win32.ActiveCfg = Release|Win32 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|Win32.Build.0 = Release|Win32 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|x64.ActiveCfg = Release|x64 - {05A59E8B-EA70-4F22-89E8-E0927BA13064}.Release|x64.Build.0 = Release|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug (no cuda)|x64.Build.0 = Debug|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|Win32.ActiveCfg = Debug|Win32 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|Win32.Build.0 = Debug|Win32 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|x64.ActiveCfg = Debug|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Debug|x64.Build.0 = Debug|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|Win32.Build.0 = Release|Win32 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|x64.ActiveCfg = Release|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release (no cuda)|x64.Build.0 = Release|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|Win32.ActiveCfg = Release|Win32 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|Win32.Build.0 = Release|Win32 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|x64.ActiveCfg = Release|x64 - {3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}.Release|x64.Build.0 = Release|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|x64.ActiveCfg = Debug|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug (no cuda)|x64.Build.0 = Debug|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|Win32.ActiveCfg = Debug|Win32 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|Win32.Build.0 = Debug|Win32 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|x64.ActiveCfg = Debug|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Debug|x64.Build.0 = Debug|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|Win32.Build.0 = Release|Win32 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|x64.ActiveCfg = Release|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release (no cuda)|x64.Build.0 = Release|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|Win32.ActiveCfg = Release|Win32 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|Win32.Build.0 = Release|Win32 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|x64.ActiveCfg = Release|x64 - {51999D3E-EF22-4BDD-965F-4201034D3DCE}.Release|x64.Build.0 = Release|x64 - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|Win32.ActiveCfg = Debug|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|Win32.Build.0 = Debug|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|x64.ActiveCfg = Debug|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug (no cuda)|x64.Build.0 = Debug|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|Win32.ActiveCfg = Debug|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|Win32.Build.0 = Debug|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Debug|x64.ActiveCfg = Debug|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|Win32.ActiveCfg = Release|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|Win32.Build.0 = Release|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release (no cuda)|x64.ActiveCfg = Release|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.Build.0 = Release|Any CPU - {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|x64.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.Build.0 = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|x64.ActiveCfg = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/project/vc9/nvtt.vsprops b/project/vc9/nvtt.vsprops deleted file mode 100644 index f2948fd..0000000 --- a/project/vc9/nvtt.vsprops +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - diff --git a/project/vc9/nvtt/nvtt.rc b/project/vc9/nvtt/nvtt.rc deleted file mode 100644 index 089d1e9..0000000 --- a/project/vc9/nvtt/nvtt.rc +++ /dev/null @@ -1,102 +0,0 @@ -// Microsoft Visual C++ generated resource script. -// -#include "resource.h" - -#define APSTUDIO_READONLY_SYMBOLS -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 2 resource. -// -#include "afxres.h" - -///////////////////////////////////////////////////////////////////////////// -#undef APSTUDIO_READONLY_SYMBOLS - -///////////////////////////////////////////////////////////////////////////// -// English (U.S.) resources - -#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) -#ifdef _WIN32 -LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US -#pragma code_page(1252) -#endif //_WIN32 - -#ifdef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// TEXTINCLUDE -// - -1 TEXTINCLUDE -BEGIN - "resource.h\0" -END - -2 TEXTINCLUDE -BEGIN - "#include ""afxres.h""\r\n" - "\0" -END - -3 TEXTINCLUDE -BEGIN - "\r\n" - "\0" -END - -#endif // APSTUDIO_INVOKED - - -///////////////////////////////////////////////////////////////////////////// -// -// Version -// - -VS_VERSION_INFO VERSIONINFO - FILEVERSION 2,1,0,0 - PRODUCTVERSION 2,1,0,0 - FILEFLAGSMASK 0x17L -#ifdef _DEBUG - FILEFLAGS 0x1L -#else - FILEFLAGS 0x0L -#endif - FILEOS 0x4L - FILETYPE 0x2L - FILESUBTYPE 0x0L -BEGIN - BLOCK "StringFileInfo" - BEGIN - BLOCK "040904b0" - BEGIN - VALUE "CompanyName", "NVIDIA Corporation" - VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library" - VALUE "FileVersion", "2, 1, 0, 0" - VALUE "InternalName", "nvtt" - VALUE "LegalCopyright", "Copyright (C) 2007" - VALUE "OriginalFilename", "nvtt.dll" - VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library" - VALUE "ProductVersion", "2, 1, 0, 0" - END - END - BLOCK "VarFileInfo" - BEGIN - VALUE "Translation", 0x409, 1200 - END -END - -#endif // English (U.S.) resources -///////////////////////////////////////////////////////////////////////////// - - - -#ifndef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 3 resource. -// - - -///////////////////////////////////////////////////////////////////////////// -#endif // not APSTUDIO_INVOKED - diff --git a/project/vc9/nvtt/nvtt.vcproj b/project/vc9/nvtt/nvtt.vcproj deleted file mode 100644 index dc6246c..0000000 --- a/project/vc9/nvtt/nvtt.vcproj +++ /dev/null @@ -1,973 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/nvtt/resource.h b/project/vc9/nvtt/resource.h deleted file mode 100644 index 4df7169..0000000 --- a/project/vc9/nvtt/resource.h +++ /dev/null @@ -1,14 +0,0 @@ -//{{NO_DEPENDENCIES}} -// Microsoft Visual C++ generated include file. -// Used by nvtt.rc - -// Next default values for new objects -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 101 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1001 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif diff --git a/project/vc9/nvzoom/nvidia.ico b/project/vc9/nvzoom/nvidia.ico deleted file mode 100644 index 6aa721e..0000000 Binary files a/project/vc9/nvzoom/nvidia.ico and /dev/null differ diff --git a/project/vc9/nvzoom/nvzoom.rc b/project/vc9/nvzoom/nvzoom.rc deleted file mode 100644 index 842ded1..0000000 Binary files a/project/vc9/nvzoom/nvzoom.rc and /dev/null differ diff --git a/project/vc9/nvzoom/nvzoom.vcproj b/project/vc9/nvzoom/nvzoom.vcproj deleted file mode 100644 index 99b45a6..0000000 --- a/project/vc9/nvzoom/nvzoom.vcproj +++ /dev/null @@ -1,381 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/nvzoom/resource.h b/project/vc9/nvzoom/resource.h deleted file mode 100644 index e765787..0000000 --- a/project/vc9/nvzoom/resource.h +++ /dev/null @@ -1,16 +0,0 @@ -//{{NO_DEPENDENCIES}} -// Microsoft Visual C++ generated include file. -// Used by nvcompress.rc -// -#define IDI_ICON1 101 - -// Next default values for new objects -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 102 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1000 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif diff --git a/project/vc9/squish/squish.vcproj b/project/vc9/squish/squish.vcproj deleted file mode 100644 index 1a9fada..0000000 --- a/project/vc9/squish/squish.vcproj +++ /dev/null @@ -1,363 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/project/vc9/stress/stress.vcproj b/project/vc9/stress/stress.vcproj deleted file mode 100644 index fe687a8..0000000 --- a/project/vc9/stress/stress.vcproj +++ /dev/null @@ -1,207 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 344bf38..4a9f376 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,12 +1,11 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/poshlib) - SUBDIRS(nvcore) SUBDIRS(nvmath) SUBDIRS(nvimage) SUBDIRS(nvtt) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + # OpenGL INCLUDE(FindOpenGL) IF(OPENGL_FOUND) @@ -16,7 +15,8 @@ ELSE(OPENGL_FOUND) ENDIF(OPENGL_FOUND) # GLUT -INCLUDE(FindGLUT) +INCLUDE(${NV_CMAKE_DIR}/FindGLUT.cmake) +#INCLUDE(FindGLUT) IF(GLUT_FOUND) MESSAGE(STATUS "Looking for GLUT - found") ELSE(GLUT_FOUND) @@ -48,7 +48,7 @@ ELSE(CG_FOUND) ENDIF(CG_FOUND) # CUDA -FIND_PACKAGE(CUDA) +INCLUDE(${NV_CMAKE_DIR}/FindCUDA.cmake) IF(CUDA_FOUND) SET(HAVE_CUDA ${CUDA_FOUND} CACHE BOOL "Set to TRUE if CUDA is found, FALSE otherwise") MESSAGE(STATUS "Looking for CUDA - found") @@ -65,15 +65,6 @@ ELSE(MAYA_FOUND) MESSAGE(STATUS "Looking for Maya - not found") ENDIF(MAYA_FOUND) -# FreeImage -INCLUDE(${NV_CMAKE_DIR}/FindFreeImage.cmake) -IF(FREEIMAGE_FOUND) - SET(HAVE_FREEIMAGE ${FREEIMAGE_FOUND} CACHE BOOL "Set to TRUE if FreeImage is found, FALSE otherwise") - MESSAGE(STATUS "Looking for FreeImage - found") -ELSE(FREEIMAGE_FOUND) - MESSAGE(STATUS "Looking for FreeImage - not found") -ENDIF(FREEIMAGE_FOUND) - # JPEG INCLUDE(FindJPEG) IF(JPEG_FOUND) @@ -93,7 +84,6 @@ ELSE(PNG_FOUND) ENDIF(PNG_FOUND) # TIFF -SET(TIFF_NAMES libtiff) INCLUDE(FindTIFF) IF(TIFF_FOUND) SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise") @@ -129,3 +119,5 @@ CHECK_INCLUDE_FILES(malloc.h HAVE_MALLOC_H) CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/nvconfig.h.in ${CMAKE_CURRENT_BINARY_DIR}/nvconfig.h) +#INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/nvconfig.h DESTINATION include) + diff --git a/src/nvconfig.h.in b/src/nvconfig.h.in index a4911ee..ac9bd57 100644 --- a/src/nvconfig.h.in +++ b/src/nvconfig.h.in @@ -11,7 +11,6 @@ #cmakedefine HAVE_JPEG #cmakedefine HAVE_TIFF #cmakedefine HAVE_OPENEXR -#cmakedefine HAVE_FREEIMAGE #cmakedefine HAVE_MAYA diff --git a/src/nvcore/Algorithms.h b/src/nvcore/Algorithms.h deleted file mode 100644 index b543b11..0000000 --- a/src/nvcore/Algorithms.h +++ /dev/null @@ -1,154 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_CORE_ALGORITHMS_H -#define NV_CORE_ALGORITHMS_H - -#include - -namespace nv -{ - - /// Return the maximum of two values. - template - inline const T & max(const T & a, const T & b) - { - //return std::max(a, b); - if( a < b ) { - return b; - } - return a; - } - - /// Return the minimum of two values. - template - inline const T & min(const T & a, const T & b) - { - //return std::min(a, b); - if( b < a ) { - return b; - } - return a; - } - - /// Clamp between two values. - template - inline const T & clamp(const T & x, const T & a, const T & b) - { - return min(max(x, a), b); - } - - /// Delete all the elements of a container. - template - void deleteAll(T & container) - { - for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i)) - { - delete container[i]; - } - } - - // @@ Should swap be implemented here? - - - - template class C> - void sort(C & container) - { - introsortLoop(container, 0, container.count()); - insertionSort(container, 0, container.count()); - } - - template class C> - void sort(C & container, uint begin, uint end) - { - if (begin < end) - { - introsortLoop(container, begin, end); - insertionSort(container, begin, end); - } - } - - template class C> - void insertionSort(C & container) - { - insertionSort(container, 0, container.count()); - } - - template class C> - void insertionSort(C & container, uint begin, uint end) - { - for (uint i = begin + 1; i != end; ++i) - { - T value = container[i]; - - uint j = i; - while (j != begin && container[j-1] > value) - { - container[j] = container[j-1]; - --j; - } - if (i != j) - { - container[j] = value; - } - } - } - - template class C> - void introsortLoop(C & container, uint begin, uint end) - { - while (end-begin > 16) - { - uint p = partition(container, begin, end, medianof3(container, begin, begin+((end-begin)/2)+1, end-1)); - introsortLoop(container, p, end); - end = p; - } - } - - template class C> - uint partition(C & a, uint begin, uint end, const T & x) - { - int i = begin, j = end; - while (true) - { - while (a[i] < x) ++i; - --j; - while (x < a[j]) --j; - if (i >= j) - return i; - swap(a[i], a[j]); - i++; - } - } - - template class C> - const T & medianof3(C & a, uint lo, uint mid, uint hi) - { - if (a[mid] < a[lo]) - { - if (a[hi] < a[mid]) - { - return a[mid]; - } - else - { - return (a[hi] < a[lo]) ? a[hi] : a[lo]; - } - } - else - { - if (a[hi] < a[mid]) - { - return (a[hi] < a[lo]) ? a[lo] : a[hi]; - } - else - { - return a[mid]; - } - } - } - - -} // nv namespace - -#endif // NV_CORE_ALGORITHMS_H diff --git a/src/nvcore/BitArray.h b/src/nvcore/BitArray.h new file mode 100644 index 0000000..01ab141 --- /dev/null +++ b/src/nvcore/BitArray.h @@ -0,0 +1,168 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_CORE_BITARRAY_H +#define NV_CORE_BITARRAY_H + +#include +#include + +namespace nv +{ + +/// Count the bits of @a x. +inline uint bitsSet(uint8 x) { + uint count = 0; + for(; x != 0; x >>= 1) { + count += (x & 1); + } + return count; +} + + +/// Count the bits of @a x. +inline uint bitsSet(uint32 x, int bits) { + uint count = 0; + for(; x != 0 && bits != 0; x >>= 1, bits--) { + count += (x & 1); + } + return count; +} + + +/// Simple bit array. +class BitArray +{ +public: + + /// Default ctor. + BitArray() {} + + /// Ctor with initial m_size. + BitArray(uint sz) + { + resize(sz); + } + + /// Get array m_size. + uint size() const { return m_size; } + + /// Clear array m_size. + void clear() { resize(0); } + + /// Set array m_size. + void resize(uint sz) + { + m_size = sz; + m_bitArray.resize( (m_size + 7) >> 3 ); + } + + /// Get bit. + bool bitAt(uint b) const + { + nvDebugCheck( b < m_size ); + return (m_bitArray[b >> 3] & (1 << (b & 7))) != 0; + } + + /// Set a bit. + void setBitAt(uint b) + { + nvDebugCheck( b < m_size ); + m_bitArray[b >> 3] |= (1 << (b & 7)); + } + + /// Clear a bit. + void clearBitAt( uint b ) + { + nvDebugCheck( b < m_size ); + m_bitArray[b >> 3] &= ~(1 << (b & 7)); + } + + /// Clear all the bits. + void clearAll() + { + memset(m_bitArray.unsecureBuffer(), 0, m_bitArray.size()); + } + + /// Set all the bits. + void setAll() + { + memset(m_bitArray.unsecureBuffer(), 0xFF, m_bitArray.size()); + } + + /// Toggle all the bits. + void toggleAll() + { + const uint byte_num = m_bitArray.size(); + for(uint b = 0; b < byte_num; b++) { + m_bitArray[b] ^= 0xFF; + } + } + + /// Get a byte of the bit array. + const uint8 & byteAt(uint index) const + { + return m_bitArray[index]; + } + + /// Set the given byte of the byte array. + void setByteAt(uint index, uint8 b) + { + m_bitArray[index] = b; + } + + /// Count the number of bits set. + uint countSetBits() const + { + const uint num = m_bitArray.size(); + if( num == 0 ) { + return 0; + } + + uint count = 0; + for(uint i = 0; i < num - 1; i++) { + count += bitsSet(m_bitArray[i]); + } + count += bitsSet(m_bitArray[num-1], m_size & 0x7); + + //piDebugCheck(count + countClearBits() == m_size); + return count; + } + + /// Count the number of bits clear. + uint countClearBits() const { + + const uint num = m_bitArray.size(); + if( num == 0 ) { + return 0; + } + + uint count = 0; + for(uint i = 0; i < num - 1; i++) { + count += bitsSet(~m_bitArray[i]); + } + count += bitsSet(~m_bitArray[num-1], m_size & 0x7); + + //piDebugCheck(count + countSetBits() == m_size); + return count; + } + + friend void swap(BitArray & a, BitArray & b) + { + swap(a.m_size, b.m_size); + swap(a.m_bitArray, b.m_bitArray); + } + + +private: + + /// Number of bits stored. + uint m_size; + + /// Array of bits. + Array m_bitArray; + +}; + +} // nv namespace + +#endif // _PI_CORE_BITARRAY_H_ diff --git a/src/nvcore/CMakeLists.txt b/src/nvcore/CMakeLists.txt index 78a124a..60ad1d9 100644 --- a/src/nvcore/CMakeLists.txt +++ b/src/nvcore/CMakeLists.txt @@ -1,25 +1,27 @@ PROJECT(nvcore) +ADD_SUBDIRECTORY(poshlib) SET(CORE_SRCS nvcore.h - Algorithms.h - Containers.h - Debug.h Debug.cpp - DefsGnucDarwin.h - DefsGnucLinux.h - DefsGnucWin32.h - DefsVcWin32.h - FileSystem.h FileSystem.cpp - Library.h Library.cpp - Memory.h Memory.cpp Ptr.h - RefCounted.h RefCounted.cpp - StrLib.h StrLib.cpp + BitArray.h + Memory.h + Memory.cpp + Debug.h + Debug.cpp + Containers.h + StrLib.h + StrLib.cpp Stream.h StdStream.h - TextReader.h TextReader.cpp - TextWriter.h TextWriter.cpp - Timer.h) + TextReader.h + TextReader.cpp + TextWriter.h + TextWriter.cpp + Radix.h + Radix.cpp + Library.h + Library.cpp) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/nvcore/Containers.h b/src/nvcore/Containers.h index 08c60e9..f0b63d4 100644 --- a/src/nvcore/Containers.h +++ b/src/nvcore/Containers.h @@ -19,7 +19,6 @@ Do not use memmove in insert & remove, use copy ctors instead. #include #include #include -//#include #include // memmove #include // for placement new @@ -71,10 +70,40 @@ namespace nv { // Templates + /// Return the maximum of two values. + template + inline const T & max(const T & a, const T & b) + { + //return std::max(a, b); + if( a < b ) { + return b; + } + return a; + } + + /// Return the minimum of two values. + template + inline const T & min(const T & a, const T & b) + { + //return std::min(a, b); + if( b < a ) { + return b; + } + return a; + } + + /// Clamp between two values. + template + inline const T & clamp(const T & x, const T & a, const T & b) + { + return min(max(x, a), b); + } + /// Swap two values. template inline void swap(T & a, T & b) { + //return std::swap(a, b); T temp = a; a = b; b = temp; @@ -105,6 +134,16 @@ namespace nv uint operator()(uint x) const { return x; } }; + /// Delete all the elements of a container. + template + void deleteAll(T & container) + { + for(typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i)) + { + delete container[i]; + } + } + /** Return the next power of two. * @see http://graphics.stanford.edu/~seander/bithacks.html @@ -115,7 +154,7 @@ namespace nv inline uint nextPowerOfTwo( uint x ) { nvDebugCheck( x != 0 ); - #if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction. + #if 1 // On modern CPUs this is as fast as using the bsr instruction. x--; x |= x >> 1; x |= x >> 2; @@ -138,6 +177,15 @@ namespace nv return (n & (n-1)) == 0; } + /// Simple iterator interface. + template + struct Iterator + { + virtual void advance(); + virtual bool isDone(); + virtual T current(); + }; + /** * Replacement for std::vector that is easier to debug and provides @@ -179,29 +227,20 @@ namespace nv } - /// Const element access. + /// Const and save vector access. const T & operator[]( uint index ) const { nvDebugCheck(index < m_size); return m_buffer[index]; } - const T & at( uint index ) const - { - nvDebugCheck(index < m_size); - return m_buffer[index]; - } - - /// Element access. + + /// Safe vector access. T & operator[] ( uint index ) { nvDebugCheck(index < m_size); return m_buffer[index]; } - T & at( uint index ) - { - nvDebugCheck(index < m_size); - return m_buffer[index]; - } + /// Get vector size. uint size() const { return m_size; } @@ -213,7 +252,7 @@ namespace nv const T * buffer() const { return m_buffer; } /// Get vector pointer. - T * mutableBuffer() { return m_buffer; } + T * unsecureBuffer() { return m_buffer; } /// Is vector empty. bool isEmpty() const { return m_size == 0; } @@ -294,22 +333,15 @@ namespace nv return m_buffer[0]; } - /// Return index of the - bool find(const T & element, uint * index) + /// Check if the given element is contained in the array. + bool contains(const T & e) const { for (uint i = 0; i < m_size; i++) { - if (index != NULL) *index = i; - return true; + if (m_buffer[i] == e) return true; } return false; } - - /// Check if the given element is contained in the array. - bool contains(const T & e) const - { - return find(e, NULL); - } - + /// Remove the element at the given index. This is an expensive operation! void removeAt( uint index ) { @@ -495,10 +527,9 @@ namespace nv } /// Assignment operator. - Array & operator=( const Array & a ) + void operator=( const Array & a ) { copy( a.m_buffer, a.m_size ); - return *this; } /* @@ -595,43 +626,18 @@ namespace nv template > class NVCORE_CLASS HashMap { + NV_FORBID_COPY(HashMap) public: /// Default ctor. HashMap() : entry_count(0), size_mask(-1), table(NULL) { } - // Copy ctor. - HashMap(const HashMap & map) : entry_count(0), size_mask(-1), table(NULL) - { - operator = (map); - } - /// Ctor with size hint. explicit HashMap(int size_hint) : entry_count(0), size_mask(-1), table(NULL) { setCapacity(size_hint); } /// Dtor. ~HashMap() { clear(); } - - // Assignment operator. - void operator= (const HashMap & map) - { - clear(); - - if (entry_count > 0) - { - entry_count = map.entry_count; - size_mask = map.size_mask; - - const uint size = uint(size_mask + 1); - table = (Entry *)nv::mem::malloc(sizeof(Entry) * size); - - // Copy elements using copy ctor. - for (uint i = 0; i < size; i++) - { - new (table + i) Entry(map.table[i]); - } - } - } + /// Set a new or existing value under the key, to the value. void set(const T& key, const U& value) diff --git a/src/nvcore/Debug.cpp b/src/nvcore/Debug.cpp index 922bce7..0babc0f 100644 --- a/src/nvcore/Debug.cpp +++ b/src/nvcore/Debug.cpp @@ -34,7 +34,7 @@ # endif #endif -#if NV_OS_DARWIN || NV_OS_FREEBSD +#if NV_OS_DARWIN # include // getpid # include # include // sysctl @@ -199,14 +199,6 @@ namespace return (void *) ucp->uc_mcontext->ss.eip; # endif # endif -# elif NV_OS_FREEBSD -# if NV_CPU_X86_64 - ucontext_t * ucp = (ucontext_t *)secret; - return (void *)ucp->uc_mcontext.mc_rip; -# elif NV_CPU_X86 - ucontext_t * ucp = (ucontext_t *)secret; - return (void *)ucp->uc_mcontext.mc_eip; -# endif # else # if NV_CPU_X86_64 // #define REG_RIP REG_INDEX(rip) // seems to be 16 diff --git a/src/nvcore/FileSystem.cpp b/src/nvcore/FileSystem.cpp deleted file mode 100644 index 4c50887..0000000 --- a/src/nvcore/FileSystem.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// This code is in the public domain -- castano@gmail.com - -#include "FileSystem.h" -#include - -#if NV_OS_WIN32 -#define _CRT_NONSTDC_NO_WARNINGS // _chdir is defined deprecated, but that's a bug, chdir is deprecated, _chdir is *not*. -//#include // PathFileExists -#include // GetFileAttributes -#include // _mkdir -#else -#include -#include -#include -#endif - -using namespace nv; - - -bool FileSystem::exists(const char * path) -{ -#if NV_OS_UNIX - return access(path, F_OK|R_OK) == 0; - //struct stat buf; - //return stat(path, &buf) == 0; -#elif NV_OS_WIN32 - // PathFileExists requires linking to shlwapi.lib - //return PathFileExists(path) != 0; - return GetFileAttributes(path) != 0xFFFFFFFF; -#else - if (FILE * fp = fopen(path, "r")) - { - fclose(fp); - return true; - } - return false; -#endif -} - -bool FileSystem::createDirectory(const char * path) -{ -#if NV_OS_WIN32 - return _mkdir(path) != -1; -#else - return mkdir(path, 0777) != -1; -#endif -} - -bool FileSystem::changeDirectory(const char * path) -{ -#if NV_OS_WIN32 - return _chdir(path) != -1; -#else - return chdir(path) != -1; -#endif -} \ No newline at end of file diff --git a/src/nvcore/FileSystem.h b/src/nvcore/FileSystem.h deleted file mode 100644 index 4d87158..0000000 --- a/src/nvcore/FileSystem.h +++ /dev/null @@ -1,23 +0,0 @@ -// This code is in the public domain -- castano@gmail.com - -#ifndef NV_CORE_FILESYSTEM_H -#define NV_CORE_FILESYSTEM_H - -#include - -namespace nv -{ - - namespace FileSystem - { - - NVCORE_API bool exists(const char * path); - NVCORE_API bool createDirectory(const char * path); - NVCORE_API bool changeDirectory(const char * path); - - } // FileSystem namespace - -} // nv namespace - - -#endif // NV_CORE_FILESYSTEM_H diff --git a/src/nvcore/Prefetch.h b/src/nvcore/Prefetch.h new file mode 100644 index 0000000..71bd0ed --- /dev/null +++ b/src/nvcore/Prefetch.h @@ -0,0 +1,31 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_CORE_PREFETCH_H +#define NV_CORE_PREFETCH_H + +#include + +// nvPrefetch +#if NV_CC_GNUC + +#define nvPrefetch(ptr) __builtin_prefetch(ptr) + +#elif NV_CC_MSVC + +#if NV_CPU_X86 +__forceinline void nvPrefetch(const void * mem) +{ + __asm mov ecx, mem + __asm prefetcht0 [ecx]; +// __asm prefetchnta [ecx]; +} +#endif // NV_CPU_X86 + +#else // NV_CC_MSVC + +// do nothing in other case. +#define nvPrefetch(ptr) + +#endif // NV_CC_MSVC + +#endif // NV_CORE_PREFETCH_H diff --git a/src/nvcore/Ptr.h b/src/nvcore/Ptr.h index 2cbe523..1d8d9c9 100644 --- a/src/nvcore/Ptr.h +++ b/src/nvcore/Ptr.h @@ -8,7 +8,6 @@ #include // NULL - namespace nv { @@ -30,11 +29,11 @@ class AutoPtr NV_FORBID_HEAPALLOC(); public: + /// Default ctor. + AutoPtr() : m_ptr(NULL) { } + /// Ctor. - AutoPtr(T * p = NULL) : m_ptr(p) { } - - template - AutoPtr(Q * p) : m_ptr(static_cast(p)) { } + explicit AutoPtr( T * p ) : m_ptr(p) { } /** Dtor. Deletes owned pointer. */ ~AutoPtr() { @@ -51,15 +50,6 @@ public: } } - template - void operator=( Q * p ) { - if (p != m_ptr) - { - delete m_ptr; - m_ptr = static_cast(p); - } - } - /** Member access. */ T * operator -> () const { nvDebugCheck(m_ptr != NULL); @@ -106,23 +96,125 @@ private: T * m_ptr; }; +#if 0 +/** Reference counted base class to be used with Pointer. + * + * The only requirement of the Pointer class is that the RefCounted class implements the + * addRef and release methods. + */ +class RefCounted +{ + NV_FORBID_COPY(RefCounted); +public: + + /// Ctor. + RefCounted() : m_count(0), m_weak_proxy(NULL) + { + s_total_obj_count++; + } + + /// Virtual dtor. + virtual ~RefCounted() + { + nvCheck( m_count == 0 ); + nvCheck( s_total_obj_count > 0 ); + s_total_obj_count--; + } + + + /// Increase reference count. + uint addRef() const + { + s_total_ref_count++; + m_count++; + return m_count; + } + + + /// Decrease reference count and remove when 0. + uint release() const + { + nvCheck( m_count > 0 ); + + s_total_ref_count--; + m_count--; + if( m_count == 0 ) { + releaseWeakProxy(); + delete this; + return 0; + } + return m_count; + } + + /// Get weak proxy. + WeakProxy * getWeakProxy() const + { + if (m_weak_proxy == NULL) { + m_weak_proxy = new WeakProxy; + m_weak_proxy->AddRef(); + } + return m_weak_proxy; + } + + /// Release the weak proxy. + void releaseWeakProxy() const + { + if (m_weak_proxy != NULL) { + m_weak_proxy->NotifyObjectDied(); + m_weak_proxy->Release(); + m_weak_proxy = NULL; + } + } + + /** @name Debug methods: */ + //@{ + /// Get reference count. + int refCount() const + { + return m_count; + } + + /// Get total number of objects. + static int totalObjectCount() + { + return s_total_obj_count; + } + + /// Get total number of references. + static int totalReferenceCount() + { + return s_total_ref_count; + } + //@} + + +private: + + NVCORE_API static int s_total_ref_count; + NVCORE_API static int s_total_obj_count; + + mutable int m_count; + mutable WeakProxy * weak_proxy; + +}; +#endif /// Smart pointer template class. template -class SmartPtr { +class Pointer { public: // BaseClass must implement addRef() and release(). - typedef SmartPtr ThisType; + typedef Pointer ThisType; /// Default ctor. - SmartPtr() : m_ptr(NULL) + Pointer() : m_ptr(NULL) { } /** Other type assignment. */ template - SmartPtr( const SmartPtr & tc ) + Pointer( const Pointer & tc ) { m_ptr = static_cast( tc.ptr() ); if( m_ptr ) { @@ -131,7 +223,7 @@ public: } /** Copy ctor. */ - SmartPtr( const ThisType & bc ) + Pointer( const ThisType & bc ) { m_ptr = bc.ptr(); if( m_ptr ) { @@ -139,8 +231,8 @@ public: } } - /** Copy cast ctor. SmartPtr(NULL) is valid. */ - explicit SmartPtr( BaseClass * bc ) + /** Copy cast ctor. Pointer(NULL) is valid. */ + explicit Pointer( BaseClass * bc ) { m_ptr = bc; if( m_ptr ) { @@ -149,7 +241,7 @@ public: } /** Dtor. */ - ~SmartPtr() + ~Pointer() { set(NULL); } @@ -183,7 +275,7 @@ public: //@{ /** Other type assignment. */ template - void operator = ( const SmartPtr & tc ) + void operator = ( const Pointer & tc ) { set( static_cast(tc.ptr()) ); } @@ -206,7 +298,7 @@ public: //@{ /** Other type equal comparation. */ template - bool operator == ( const SmartPtr & other ) const + bool operator == ( const Pointer & other ) const { return m_ptr == other.ptr(); } @@ -225,7 +317,7 @@ public: /** Other type not equal comparation. */ template - bool operator != ( const SmartPtr & other ) const + bool operator != ( const Pointer & other ) const { return m_ptr != other.ptr(); } diff --git a/src/nvcore/Radix.cpp b/src/nvcore/Radix.cpp new file mode 100644 index 0000000..713215f --- /dev/null +++ b/src/nvcore/Radix.cpp @@ -0,0 +1,429 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/** + * Contains source code from the article "Radix Sort Revisited". + * \file Radix.cpp + * \author Pierre Terdiman + * \date April, 4, 2000 + */ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/** + * Revisited Radix Sort. + * This is my new radix routine: + * - it uses indices and doesn't recopy the values anymore, hence wasting less ram + * - it creates all the histograms in one run instead of four + * - it sorts words faster than dwords and bytes faster than words + * - it correctly sorts negative floating-point values by patching the offsets + * - it automatically takes advantage of temporal coherence + * - multiple keys support is a side effect of temporal coherence + * - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway] + * + * History: + * - 08.15.98: very first version + * - 04.04.00: recoded for the radix article + * - 12.xx.00: code lifting + * - 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here) + * - 10.11.01: added local ram support + * - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting...... + * + * \class RadixSort + * \author Pierre Terdiman + * \version 1.3 + * \date August, 15, 1998 + */ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* +To do: + - add an offset parameter between two input values (avoid some data recopy sometimes) + - unroll ? asm ? +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Header + +#include + +#include // memset + +//using namespace IceCore; + +#define DELETEARRAY(a) { delete [] a; a = NULL; } +#define CHECKALLOC(a) + + + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/** + * Constructor. + */ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +RadixSort::RadixSort() : mCurrentSize(0), mPreviousSize(0), mIndices(NULL), mIndices2(NULL), mTotalCalls(0), mNbHits(0) +{ +#ifndef RADIX_LOCAL_RAM + // Allocate input-independent ram + mHistogram = new uint32[256*4]; + mOffset = new uint32[256]; +#endif + // Initialize indices + resetIndices(); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/** + * Destructor. + */ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +RadixSort::~RadixSort() +{ + // Release everything +#ifndef RADIX_LOCAL_RAM + DELETEARRAY(mOffset); + DELETEARRAY(mHistogram); +#endif + DELETEARRAY(mIndices2); + DELETEARRAY(mIndices); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/** + * Resizes the inner lists. + * \param nb [in] new size (number of dwords) + * \return true if success + */ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool RadixSort::resize(uint32 nb) +{ + // Free previously used ram + DELETEARRAY(mIndices2); + DELETEARRAY(mIndices); + + // Get some fresh one + mIndices = new uint32[nb]; CHECKALLOC(mIndices); + mIndices2 = new uint32[nb]; CHECKALLOC(mIndices2); + mCurrentSize = nb; + + // Initialize indices so that the input buffer is read in sequential order + resetIndices(); + + return true; +} + +#define CHECK_RESIZE(n) \ + if(n!=mPreviousSize) \ + { \ + if(n>mCurrentSize) resize(n); \ + else resetIndices(); \ + mPreviousSize = n; \ + } + +#define CREATE_HISTOGRAMS(type, buffer) \ + /* Clear counters */ \ + memset(mHistogram, 0, 256*4*sizeof(uint32)); \ + \ + /* Prepare for temporal coherence */ \ + type PrevVal = (type)buffer[mIndices[0]]; \ + bool AlreadySorted = true; /* Optimism... */ \ + uint32* Indices = mIndices; \ + \ + /* Prepare to count */ \ + uint8* p = (uint8*)input; \ + uint8* pe = &p[nb*4]; \ + uint32* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */ \ + uint32* h1= &mHistogram[256]; /* Histogram for second pass */ \ + uint32* h2= &mHistogram[512]; /* Histogram for third pass */ \ + uint32* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */ \ + \ + while(p!=pe) \ + { \ + /* Read input buffer in previous sorted order */ \ + type Val = (type)buffer[*Indices++]; \ + /* Check whether already sorted or not */ \ + if(Val>24; // Radix byte, same as above. AND is useless here (uint32). + // ### cmp to be killed. Not good. Later. + if(Radix<128) mIndices2[mOffset[Radix]++] = mIndices[i]; // Number is positive, same as above + else mIndices2[--mOffset[Radix]] = mIndices[i]; // Number is negative, flip the sorting order + } + // Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap. + uint32* Tmp = mIndices; mIndices = mIndices2; mIndices2 = Tmp; + } + else + { + // The pass is useless, yet we still have to reverse the order of current list if all values are negative. + if(UniqueVal>=128) + { + for(i=0;i + + +#define RADIX_LOCAL_RAM + + +class NVCORE_API RadixSort { + NV_FORBID_COPY(RadixSort); +public: + // Constructor/Destructor + RadixSort(); + ~RadixSort(); + + // Sorting methods + RadixSort & sort(const uint32* input, uint32 nb, bool signedvalues=true); + RadixSort & sort(const float* input, uint32 nb); + + //! Access to results. mIndices is a list of indices in sorted order, i.e. in the order you may further process your data + inline uint32 * indices() const { return mIndices; } + + //! mIndices2 gets trashed on calling the sort routine, but otherwise you can recycle it the way you want. + inline uint32 * recyclable() const { return mIndices2; } + + // Stats + uint32 usedRam() const; + + //! Returns the total number of calls to the radix sorter. + inline uint32 totalCalls() const { return mTotalCalls; } + + //! Returns the number of premature exits due to temporal coherence. + inline uint32 hits() const { return mNbHits; } + + + private: +#ifndef RADIX_LOCAL_RAM + uint32* mHistogram; //!< Counters for each byte + uint32* mOffset; //!< Offsets (nearly a cumulative distribution function) +#endif + uint32 mCurrentSize; //!< Current size of the indices list + uint32 mPreviousSize; //!< Size involved in previous call + uint32* mIndices; //!< Two lists, swapped each pass + uint32* mIndices2; + + // Stats + uint32 mTotalCalls; + uint32 mNbHits; + + // Internal methods + bool resize(uint32 nb); + void resetIndices(); + +}; + + +#endif // NV_CORE_RADIXSORT_H diff --git a/src/nvcore/RefCounted.cpp b/src/nvcore/RefCounted.cpp deleted file mode 100644 index 595c74c..0000000 --- a/src/nvcore/RefCounted.cpp +++ /dev/null @@ -1,9 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include "RefCounted.h" - -using namespace nv; - -int nv::RefCounted::s_total_ref_count = 0; -int nv::RefCounted::s_total_obj_count = 0; - diff --git a/src/nvcore/RefCounted.h b/src/nvcore/RefCounted.h deleted file mode 100644 index 309b2ad..0000000 --- a/src/nvcore/RefCounted.h +++ /dev/null @@ -1,114 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_CORE_REFCOUNTED_H -#define NV_CORE_REFCOUNTED_H - -#include -#include - - -namespace nv -{ - - /// Reference counted base class to be used with SmartPtr and WeakPtr. - class RefCounted - { - NV_FORBID_COPY(RefCounted); - public: - - /// Ctor. - RefCounted() : m_count(0)/*, m_weak_proxy(NULL)*/ - { - s_total_obj_count++; - } - - /// Virtual dtor. - virtual ~RefCounted() - { - nvCheck( m_count == 0 ); - nvCheck( s_total_obj_count > 0 ); - s_total_obj_count--; - } - - - /// Increase reference count. - uint addRef() const - { - s_total_ref_count++; - m_count++; - return m_count; - } - - - /// Decrease reference count and remove when 0. - uint release() const - { - nvCheck( m_count > 0 ); - - s_total_ref_count--; - m_count--; - if( m_count == 0 ) { - // releaseWeakProxy(); - delete this; - return 0; - } - return m_count; - } - /* - /// Get weak proxy. - WeakProxy * getWeakProxy() const - { - if (m_weak_proxy == NULL) { - m_weak_proxy = new WeakProxy; - m_weak_proxy->AddRef(); - } - return m_weak_proxy; - } - - /// Release the weak proxy. - void releaseWeakProxy() const - { - if (m_weak_proxy != NULL) { - m_weak_proxy->NotifyObjectDied(); - m_weak_proxy->Release(); - m_weak_proxy = NULL; - } - } - */ - /** @name Debug methods: */ - //@{ - /// Get reference count. - int refCount() const - { - return m_count; - } - - /// Get total number of objects. - static int totalObjectCount() - { - return s_total_obj_count; - } - - /// Get total number of references. - static int totalReferenceCount() - { - return s_total_ref_count; - } - //@} - - - private: - - NVCORE_API static int s_total_ref_count; - NVCORE_API static int s_total_obj_count; - - mutable int m_count; - // mutable WeakProxy * weak_proxy; - - }; - - -} // nv namespace - - -#endif // NV_CORE_REFCOUNTED_H diff --git a/src/nvcore/StdStream.h b/src/nvcore/StdStream.h index 31a64f4..4304612 100644 --- a/src/nvcore/StdStream.h +++ b/src/nvcore/StdStream.h @@ -1,7 +1,5 @@ -// This code is in the public domain -- castano@gmail.com - -#ifndef NV_CORE_STDSTREAM_H -#define NV_CORE_STDSTREAM_H +#ifndef NV_STDSTREAM_H +#define NV_STDSTREAM_H #include @@ -368,4 +366,4 @@ private: } // nv namespace -#endif // NV_CORE_STDSTREAM_H +#endif // NV_STDSTREAM_H diff --git a/src/nvcore/Stream.h b/src/nvcore/Stream.h index 82a9a3f..4a35120 100644 --- a/src/nvcore/Stream.h +++ b/src/nvcore/Stream.h @@ -1,7 +1,7 @@ -// This code is in the public domain -- castano@gmail.com +// This code is in the public domain -- castanyo@yahoo.es -#ifndef NV_CORE_STREAM_H -#define NV_CORE_STREAM_H +#ifndef NVCORE_STREAM_H +#define NVCORE_STREAM_H #include #include @@ -9,152 +9,152 @@ namespace nv { - /// Base stream class. - class NVCORE_CLASS Stream { - public: - - enum ByteOrder { - LittleEndian = false, - BigEndian = true, - }; - - /// Get the byte order of the system. - static ByteOrder getSystemByteOrder() { - #if NV_LITTLE_ENDIAN - return LittleEndian; - #else - return BigEndian; - #endif - } - - - /// Ctor. - Stream() : m_byteOrder(LittleEndian) { } - - /// Virtual destructor. - virtual ~Stream() {} - - /// Set byte order. - void setByteOrder(ByteOrder bo) { m_byteOrder = bo; } - - /// Get byte order. - ByteOrder byteOrder() const { return m_byteOrder; } - - - /// Serialize the given data. - virtual uint serialize( void * data, uint len ) = 0; - - /// Move to the given position in the archive. - virtual void seek( uint pos ) = 0; - - /// Return the current position in the archive. - virtual uint tell() const = 0; +/// Base stream class. +class NVCORE_CLASS Stream { +public: - /// Return the current size of the archive. - virtual uint size() const = 0; - - /// Determine if there has been any error. - virtual bool isError() const = 0; - - /// Clear errors. - virtual void clearError() = 0; - - /// Return true if the stream is at the end. - virtual bool isAtEnd() const = 0; - - /// Return true if the stream is seekable. - virtual bool isSeekable() const = 0; - - /// Return true if this is an input stream. - virtual bool isLoading() const = 0; - - /// Return true if this is an output stream. - virtual bool isSaving() const = 0; + enum ByteOrder { + LittleEndian = false, + BigEndian = true, + }; - - // friends - friend Stream & operator<<( Stream & s, bool & c ) { - #if NV_OS_DARWIN - nvStaticCheck(sizeof(bool) == 4); - uint8 b = c ? 1 : 0; - s.serialize( &b, 1 ); - c = (b == 1); - #else - nvStaticCheck(sizeof(bool) == 1); - s.serialize( &c, 1 ); - #endif - return s; - } - friend Stream & operator<<( Stream & s, char & c ) { - nvStaticCheck(sizeof(char) == 1); - s.serialize( &c, 1 ); - return s; + /// Get the byte order of the system. + static ByteOrder getSystemByteOrder() { +# if NV_LITTLE_ENDIAN + return LittleEndian; +# else + return BigEndian; +# endif + } + + + /// Ctor. + Stream() : m_byteOrder(LittleEndian) { } + + /// Virtual destructor. + virtual ~Stream() {} + + /// Set byte order. + void setByteOrder(ByteOrder bo) { m_byteOrder = bo; } + + /// Get byte order. + ByteOrder byteOrder() const { return m_byteOrder; } + + + /// Serialize the given data. + virtual uint serialize( void * data, uint len ) = 0; + + /// Move to the given position in the archive. + virtual void seek( uint pos ) = 0; + + /// Return the current position in the archive. + virtual uint tell() const = 0; + + /// Return the current size of the archive. + virtual uint size() const = 0; + + /// Determine if there has been any error. + virtual bool isError() const = 0; + + /// Clear errors. + virtual void clearError() = 0; + + /// Return true if the stream is at the end. + virtual bool isAtEnd() const = 0; + + /// Return true if the stream is seekable. + virtual bool isSeekable() const = 0; + + /// Return true if this is an input stream. + virtual bool isLoading() const = 0; + + /// Return true if this is an output stream. + virtual bool isSaving() const = 0; + + + // friends + friend Stream & operator<<( Stream & s, bool & c ) { +# if NV_OS_DARWIN + nvStaticCheck(sizeof(bool) == 4); + uint8 b = c ? 1 : 0; + s.serialize( &b, 1 ); + c = (b == 1); +# else + nvStaticCheck(sizeof(bool) == 1); + s.serialize( &c, 1 ); +# endif + return s; + } + friend Stream & operator<<( Stream & s, char & c ) { + nvStaticCheck(sizeof(char) == 1); + s.serialize( &c, 1 ); + return s; + } + friend Stream & operator<<( Stream & s, uint8 & c ) { + nvStaticCheck(sizeof(uint8) == 1); + s.serialize( &c, 1 ); + return s; + } + friend Stream & operator<<( Stream & s, int8 & c ) { + nvStaticCheck(sizeof(int8) == 1); + s.serialize( &c, 1 ); + return s; + } + friend Stream & operator<<( Stream & s, uint16 & c ) { + nvStaticCheck(sizeof(uint16) == 2); + return s.byteOrderSerialize( &c, 2 ); + } + friend Stream & operator<<( Stream & s, int16 & c ) { + nvStaticCheck(sizeof(int16) == 2); + return s.byteOrderSerialize( &c, 2 ); + } + friend Stream & operator<<( Stream & s, uint32 & c ) { + nvStaticCheck(sizeof(uint32) == 4); + return s.byteOrderSerialize( &c, 4 ); + } + friend Stream & operator<<( Stream & s, int32 & c ) { + nvStaticCheck(sizeof(int32) == 4); + return s.byteOrderSerialize( &c, 4 ); + } + friend Stream & operator<<( Stream & s, uint64 & c ) { + nvStaticCheck(sizeof(uint64) == 8); + return s.byteOrderSerialize( &c, 8 ); + } + friend Stream & operator<<( Stream & s, int64 & c ) { + nvStaticCheck(sizeof(int64) == 8); + return s.byteOrderSerialize( &c, 8 ); + } + friend Stream & operator<<( Stream & s, float & c ) { + nvStaticCheck(sizeof(float) == 4); + return s.byteOrderSerialize( &c, 4 ); + } + friend Stream & operator<<( Stream & s, double & c ) { + nvStaticCheck(sizeof(double) == 8); + return s.byteOrderSerialize( &c, 8 ); + } + +protected: + + /// Serialize in the stream byte order. + Stream & byteOrderSerialize( void * v, uint len ) { + if( m_byteOrder == getSystemByteOrder() ) { + serialize( v, len ); } - friend Stream & operator<<( Stream & s, uint8 & c ) { - nvStaticCheck(sizeof(uint8) == 1); - s.serialize( &c, 1 ); - return s; - } - friend Stream & operator<<( Stream & s, int8 & c ) { - nvStaticCheck(sizeof(int8) == 1); - s.serialize( &c, 1 ); - return s; - } - friend Stream & operator<<( Stream & s, uint16 & c ) { - nvStaticCheck(sizeof(uint16) == 2); - return s.byteOrderSerialize( &c, 2 ); - } - friend Stream & operator<<( Stream & s, int16 & c ) { - nvStaticCheck(sizeof(int16) == 2); - return s.byteOrderSerialize( &c, 2 ); - } - friend Stream & operator<<( Stream & s, uint32 & c ) { - nvStaticCheck(sizeof(uint32) == 4); - return s.byteOrderSerialize( &c, 4 ); - } - friend Stream & operator<<( Stream & s, int32 & c ) { - nvStaticCheck(sizeof(int32) == 4); - return s.byteOrderSerialize( &c, 4 ); - } - friend Stream & operator<<( Stream & s, uint64 & c ) { - nvStaticCheck(sizeof(uint64) == 8); - return s.byteOrderSerialize( &c, 8 ); - } - friend Stream & operator<<( Stream & s, int64 & c ) { - nvStaticCheck(sizeof(int64) == 8); - return s.byteOrderSerialize( &c, 8 ); - } - friend Stream & operator<<( Stream & s, float & c ) { - nvStaticCheck(sizeof(float) == 4); - return s.byteOrderSerialize( &c, 4 ); - } - friend Stream & operator<<( Stream & s, double & c ) { - nvStaticCheck(sizeof(double) == 8); - return s.byteOrderSerialize( &c, 8 ); - } - - protected: - - /// Serialize in the stream byte order. - Stream & byteOrderSerialize( void * v, uint len ) { - if( m_byteOrder == getSystemByteOrder() ) { - serialize( v, len ); - } - else { - for( uint i = len; i > 0; i-- ) { - serialize( (uint8 *)v + i - 1, 1 ); - } + else { + for( uint i = len; i > 0; i-- ) { + serialize( (uint8 *)v + i - 1, 1 ); } - return *this; } + return *this; + } - private: +private: - ByteOrder m_byteOrder; + ByteOrder m_byteOrder; - }; +}; } // nv namespace -#endif // NV_CORE_STREAM_H +#endif // NV_STREAM_H diff --git a/src/nvcore/TextReader.cpp b/src/nvcore/TextReader.cpp index 711cba7..8eb7461 100644 --- a/src/nvcore/TextReader.cpp +++ b/src/nvcore/TextReader.cpp @@ -1,6 +1,6 @@ -// This code is in the public domain -- castano@gmail.com +// This code is in the public domain -- castanyo@yahoo.es -#include "TextReader.h" +#include using namespace nv; @@ -48,7 +48,7 @@ const char * TextReader::readToEnd() m_text.reserve(size + 1); m_text.resize(size); - m_stream->serialize(m_text.mutableBuffer(), size); + m_stream->serialize(m_text.unsecureBuffer(), size); m_text.pushBack('\0'); return m_text.buffer(); diff --git a/src/nvcore/TextReader.h b/src/nvcore/TextReader.h index 513ec0f..b3d6d37 100644 --- a/src/nvcore/TextReader.h +++ b/src/nvcore/TextReader.h @@ -1,10 +1,11 @@ -// This code is in the public domain -- castano@gmail.com +// This code is in the public domain -- castanyo@yahoo.es -#ifndef NV_CORE_TEXTREADER_H -#define NV_CORE_TEXTREADER_H +#ifndef NVCORE_TEXTREADER_H +#define NVCORE_TEXTREADER_H -#include +#include #include +#include namespace nv { @@ -34,4 +35,4 @@ private: } // nv namespace -#endif // NV_CORE_TEXTREADER_H +#endif // NVCORE_TEXTREADER_H diff --git a/src/nvcore/TextWriter.cpp b/src/nvcore/TextWriter.cpp index 8078d0c..f5e1783 100644 --- a/src/nvcore/TextWriter.cpp +++ b/src/nvcore/TextWriter.cpp @@ -1,6 +1,6 @@ -// This code is in the public domain -- castano@gmail.com +// This code is in the public domain -- castanyo@yahoo.es -#include "TextWriter.h" +#include using namespace nv; diff --git a/src/nvcore/TextWriter.h b/src/nvcore/TextWriter.h index 9d90002..155373c 100644 --- a/src/nvcore/TextWriter.h +++ b/src/nvcore/TextWriter.h @@ -1,10 +1,11 @@ -// This code is in the public domain -- castano@gmail.com +// This code is in the public domain -- castanyo@yahoo.es -#ifndef NV_CORE_TEXTWRITER_H -#define NV_CORE_TEXTWRITER_H +#ifndef NVCORE_TEXTWRITER_H +#define NVCORE_TEXTWRITER_H -#include +#include #include +#include namespace nv { diff --git a/src/nvcore/Timer.h b/src/nvcore/Timer.h deleted file mode 100644 index d6cb030..0000000 --- a/src/nvcore/Timer.h +++ /dev/null @@ -1,60 +0,0 @@ -// This code is in the public domain -- castano@gmail.com - -#ifndef NV_CORE_TIMER_H -#define NV_CORE_TIMER_H - -#include - -#if 1 - -#include //clock - -class NVCORE_CLASS Timer -{ -public: - Timer() {} - - void start() { m_start = clock(); } - void stop() { m_stop = clock(); } - - float elapsed() const { return float(m_stop - m_start) / CLOCKS_PER_SEC; } - -private: - clock_t m_start; - clock_t m_stop; -}; - -#else - -#define WINDOWS_LEAN_AND_MEAN -#define VC_EXTRALEAN -#define NOMINMAX -#include - -class NVCORE_CLASS Timer -{ -public: - Timer() { - // get the tick frequency from the OS - QueryPerformanceFrequency((LARGE_INTEGER*) &m_frequency); - } - - void start() { QueryPerformanceCounter((LARGE_INTEGER*) &m_start); } - void stop() { QueryPerformanceCounter((LARGE_INTEGER*) &m_stop); } - - int elapsed() const { - return (int)1000 * ((double)m_stop.QuadPart - (double)m_start.QuadPart) / (double)m_frequency.QuadPart; - } - -private: - LARGE_INTEGER m_frequency; - LARGE_INTEGER m_start; - LARGE_INTEGER m_stop; - -}; - -#endif // 0 - - - -#endif // NV_CORE_TIMER_H diff --git a/src/nvcore/Tokenizer.cpp b/src/nvcore/Tokenizer.cpp new file mode 100644 index 0000000..b67e00f --- /dev/null +++ b/src/nvcore/Tokenizer.cpp @@ -0,0 +1,229 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include +#include + +#include // vsscanf +#include // va_list +#include // atof, atoi + +#if NV_CC_MSVC +#if 0 // This doesn't work on MSVC for x64 +/* vsscanf for Win32 + * Written 5/2003 by + * This code is in the Public Domain + */ + +#include // alloca +//#include + +static int vsscanf(const char * buffer, const char * format, va_list argPtr) +{ + // Get an upper bound for the # of args + size_t count = 0; + const char *p = format; + while(1) { + char c = *(p++); + if(c==0) break; + if(c=='%' && (p[0]!='*' && p[0]!='%')) ++count; + } + + // Make a local stack + size_t stackSize = (2+count)*sizeof(void*); + void **newStack = (void**)alloca(stackSize); + + // Fill local stack the way sscanf likes it + newStack[0] = (void*)buffer; + newStack[1] = (void*)format; + memcpy(newStack+2, argPtr, count*sizeof(void*)); + + // @@ Use: CALL DWORD PTR [sscanf] + + // Warp into system sscanf with new stack + int result; + void *savedESP; + __asm + { + mov savedESP, esp + mov esp, newStack +#if _MSC_VER >= 1400 + call DWORD PTR [sscanf_s] +#else + call DWORD PTR [sscanf] +#endif + mov esp, savedESP + mov result, eax + } + return result; +} +#endif +#endif + +using namespace nv; + +Token::Token() : + m_str(""), m_len(0) +{ +} + +Token::Token(const Token & token) : + m_str(token.m_str), m_len(token.m_len) +{ +} + +Token::Token(const char * str, int len) : + m_str(str), m_len(len) +{ +} + +bool Token::operator==(const char * str) const +{ + return strncmp(m_str, str, m_len) == 0; +} +bool Token::operator!=(const char * str) const +{ + return strncmp(m_str, str, m_len) != 0; +} + +bool Token::isNull() +{ + return m_len != 0; +} + +float Token::toFloat() const +{ + return float(atof(m_str)); +} + +int Token::toInt() const +{ + return atoi(m_str); +} + +uint Token::toUnsignedInt() const +{ + // @@ TBD + return uint(atoi(m_str)); +} + +String Token::toString() const +{ + return String(m_str, m_len); +} + +bool Token::parse(const char * format, int count, ...) const +{ + va_list arg; + va_start(arg, count); + + int readCount = vsscanf(m_str, format, arg); + + va_end(arg); + + return readCount == count; +} + + +Tokenizer::Tokenizer(Stream * stream) : + m_reader(stream), m_lineNumber(0), m_columnNumber(0), m_delimiters("{}()="), m_spaces(" \t") +{ +} + +bool Tokenizer::nextLine(bool skipEmptyLines /*= true*/) +{ + do { + if (!readLine()) { + return false; + } + } + while (!readToken() && skipEmptyLines); + + return true; +} + +bool Tokenizer::nextToken(bool skipEndOfLine /*= false*/) +{ + if (!readToken()) { + if (!skipEndOfLine) { + return false; + } + else { + return nextLine(true); + } + } + return true; +} + +bool Tokenizer::readToken() +{ + skipSpaces(); + + const char * begin = m_line + m_columnNumber; + + if (*begin == '\0') { + return false; + } + + char c = readChar(); + if (isDelimiter(c)) { + m_token = Token(begin, 1); + return true; + } + + // @@ Add support for quoted tokens "", '' + + int len = 0; + while (!isDelimiter(c) && !isSpace(c) && c != '\0') { + c = readChar(); + len++; + } + m_columnNumber--; + + m_token = Token(begin, len); + + return true; +} + +char Tokenizer::readChar() +{ + return m_line[m_columnNumber++]; +} + +bool Tokenizer::readLine() +{ + m_lineNumber++; + m_columnNumber = 0; + m_line = m_reader.readLine(); + return m_line != NULL; +} + +void Tokenizer::skipSpaces() +{ + while (isSpace(readChar())) {} + m_columnNumber--; +} + +bool Tokenizer::isSpace(char c) +{ + uint i = 0; + while (m_spaces[i] != '\0') { + if (c == m_spaces[i]) { + return true; + } + i++; + } + return false; +} + +bool Tokenizer::isDelimiter(char c) +{ + uint i = 0; + while (m_delimiters[i] != '\0') { + if (c == m_delimiters[i]) { + return true; + } + i++; + } + return false; +} + diff --git a/src/nvcore/Tokenizer.h b/src/nvcore/Tokenizer.h new file mode 100644 index 0000000..48579c8 --- /dev/null +++ b/src/nvcore/Tokenizer.h @@ -0,0 +1,99 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_CORE_TOKENIZER_H +#define NV_CORE_TOKENIZER_H + +#include +#include +#include +#include + +namespace nv +{ + /// A token produced by the Tokenizer. + class NVCORE_CLASS Token + { + public: + Token(); + Token(const Token & token); + Token(const char * str, int len); + + bool operator==(const char * str) const; + bool operator!=(const char * str) const; + + bool isNull(); + + float toFloat() const; + int toInt() const; + uint toUnsignedInt() const; + String toString() const; + + bool parse(const char * format, int count, ...) const __attribute__((format (scanf, 2, 4))); + + private: + const char * m_str; + int m_len; + }; + + /// Exception thrown by the tokenizer. + class TokenizerException + { + public: + TokenizerException(int line, int column) : m_line(line), m_column(column) {} + + int line() const { return m_line; } + int column() const { return m_column; } + + private: + int m_line; + int m_column; + }; + + // @@ Use enums instead of bools for clarity! + //enum SkipEmptyLines { skipEmptyLines, noSkipEmptyLines }; + //enum SkipEndOfLine { skipEndOfLine, noSkipEndOfLine }; + + /// A simple stream tokenizer. + class NVCORE_CLASS Tokenizer + { + public: + Tokenizer(Stream * stream); + + bool nextLine(bool skipEmptyLines = true); + bool nextToken(bool skipEndOfLine = false); + + const Token & token() const { return m_token; } + + int lineNumber() const { return m_lineNumber; } + int columnNumber() const { return m_columnNumber; } + + void setDelimiters(const char * str) { m_delimiters = str; } + const char * delimiters() const { return m_delimiters; } + + void setSpaces(const char * str) { m_spaces = str; } + const char * spaces() const { return m_spaces; } + + private: + char readChar(); + bool readLine(); + bool readToken(); + void skipSpaces(); + bool isSpace(char c); + bool isDelimiter(char c); + + private: + TextReader m_reader; + const char * m_line; + Token m_token; + + int m_lineNumber; + int m_columnNumber; + + const char * m_delimiters; + const char * m_spaces; + }; + +} // nv namespace + + +#endif // NV_CORE_TOKENIZER_H diff --git a/src/nvcore/nvcore.h b/src/nvcore/nvcore.h index 92a34b0..469f6ad 100644 --- a/src/nvcore/nvcore.h +++ b/src/nvcore/nvcore.h @@ -22,7 +22,7 @@ // Platform definitions -#include +#include "poshlib/posh.h" // OS: // NV_OS_WIN32 @@ -38,9 +38,6 @@ #if defined POSH_OS_LINUX # define NV_OS_LINUX 1 # define NV_OS_UNIX 1 -#elif defined POSH_OS_FREEBSD -# define NV_OS_FREEBSD 1 -# define NV_OS_UNIX 1 #elif defined POSH_OS_CYGWIN32 # define NV_OS_CYGWIN 1 #elif defined POSH_OS_MINGW @@ -129,13 +126,6 @@ #define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2 #define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3) #define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3 -#define NV_STRING2(x) #x -#define NV_STRING(x) NV_STRING2(x) -#if NV_CC_GNUC -#define NV_FILE_LINE __FILE__ ":" NV_STRING(__LINE__) ": " -#else -#define NV_FILE_LINE __FILE__ "(" NV_STRING(__LINE__) ") : " -#endif // Startup initialization macro. #define NV_AT_STARTUP(some_code) \ @@ -168,7 +158,7 @@ #elif NV_CC_GNUC # if NV_OS_LINUX # include "DefsGnucLinux.h" -# elif NV_OS_DARWIN || NV_OS_FREEBSD +# elif NV_OS_DARWIN # include "DefsGnucDarwin.h" # elif NV_OS_MINGW # include "DefsGnucWin32.h" diff --git a/extern/poshlib/CMakeLists.txt b/src/nvcore/poshlib/CMakeLists.txt similarity index 100% rename from extern/poshlib/CMakeLists.txt rename to src/nvcore/poshlib/CMakeLists.txt diff --git a/extern/poshlib/posh.c b/src/nvcore/poshlib/posh.c similarity index 100% rename from extern/poshlib/posh.c rename to src/nvcore/poshlib/posh.c diff --git a/extern/poshlib/posh.h b/src/nvcore/poshlib/posh.h similarity index 99% rename from extern/poshlib/posh.h rename to src/nvcore/poshlib/posh.h index 787419c..3a7c381 100644 --- a/extern/poshlib/posh.h +++ b/src/nvcore/poshlib/posh.h @@ -293,11 +293,6 @@ Metrowerks: # define POSH_OS_STRING "Linux" #endif -#if defined __FreeBSD__ -# define POSH_OS_FREEBSD 1 -# define POSH_OS_STRING "FreeBSD" -#endif - #if defined __CYGWIN32__ # define POSH_OS_CYGWIN32 1 # define POSH_OS_STRING "Cygwin" diff --git a/src/nvimage/BlockDXT.cpp b/src/nvimage/BlockDXT.cpp index 25fd64d..6e185b5 100644 --- a/src/nvimage/BlockDXT.cpp +++ b/src/nvimage/BlockDXT.cpp @@ -21,13 +21,10 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include "BlockDXT.h" - -#include - #include -#include // swap +#include "ColorBlock.h" +#include "BlockDXT.h" using namespace nv; @@ -39,9 +36,9 @@ using namespace nv; uint BlockDXT1::evaluatePalette(Color32 color_array[4]) const { // Does bit expansion before interpolation. - color_array[0].r = (col0.r << 3) | (col0.r >> 2); - color_array[0].g = (col0.g << 2) | (col0.g >> 4); color_array[0].b = (col0.b << 3) | (col0.b >> 2); + color_array[0].g = (col0.g << 2) | (col0.g >> 4); + color_array[0].r = (col0.r << 3) | (col0.r >> 2); color_array[0].a = 0xFF; // @@ Same as above, but faster? @@ -93,51 +90,6 @@ uint BlockDXT1::evaluatePalette(Color32 color_array[4]) const } } - -uint BlockDXT1::evaluatePaletteNV5x(Color32 color_array[4]) const -{ - // Does bit expansion before interpolation. - color_array[0].r = (3 * col0.r * 22) / 8; - color_array[0].g = (col0.g << 2) | (col0.g >> 4); - color_array[0].b = (3 * col0.b * 22) / 8; - color_array[0].a = 0xFF; - - color_array[1].r = (3 * col1.r * 22) / 8; - color_array[1].g = (col1.g << 2) | (col1.g >> 4); - color_array[1].b = (3 * col1.b * 22) / 8; - color_array[1].a = 0xFF; - - if( col0.u > col1.u ) { - // Four-color block: derive the other two colors. - color_array[2].r = (2 * col0.r + col1.r) * 22 / 8; - color_array[2].g = (256 * color_array[0].g + (color_array[1].g - color_array[0].g)/4 + 128 + (color_array[1].g - color_array[0].g) * 80) / 256; - color_array[2].b = (2 * col0.b + col1.b) * 22 / 8; - color_array[2].a = 0xFF; - - color_array[3].r = (2 * col1.r + col0.r) * 22 / 8; - color_array[3].g = (256 * color_array[1].g + (color_array[0].g - color_array[1].g)/4 + 128 + (color_array[0].g - color_array[1].g) * 80) / 256; - color_array[3].b = (2 * col1.b + col0.b) * 22 / 8; - - color_array[3].a = 0xFF; - return 4; - } - else { - // Three-color block: derive the other color. - color_array[2].r = (col0.r + col1.r) * 33 / 8; - color_array[2].g = (256 * color_array[0].g + (color_array[1].g - color_array[0].g)/4 + 128 + (color_array[1].g - color_array[0].g) * 128) / 256; - color_array[2].b = (col0.b + col1.b) * 33 / 8; - color_array[2].a = 0xFF; - - // Set all components to 0 to match DXT specs. - color_array[3].r = 0x00; // color_array[2].r; - color_array[3].g = 0x00; // color_array[2].g; - color_array[3].b = 0x00; // color_array[2].b; - color_array[3].a = 0x00; - - return 3; - } -} - // Evaluate palette assuming 3 color block. void BlockDXT1::evaluatePalette3(Color32 color_array[4]) const { @@ -190,6 +142,95 @@ void BlockDXT1::evaluatePalette4(Color32 color_array[4]) const } +/* Jason Dorie's code. +// ---------------------------------------------------------------------------- +// Build palette for a 3 color + traparent black block +// ---------------------------------------------------------------------------- +void DXTCGen::BuildCodes3(cbVector *pVects, cbVector &v1, cbVector &v2) +{ + //pVects[0] = v1; + //pVects[2] = v2; + //pVects[1][0] = v1[0]; + //pVects[1][1] = (BYTE)( ((long)v1[1] + (long)v2[1]) / 2 ); + //pVects[1][2] = (BYTE)( ((long)v1[2] + (long)v2[2]) / 2 ); + //pVects[1][3] = (BYTE)( ((long)v1[3] + (long)v2[3]) / 2 ); + + __asm { + mov ecx, dword ptr pVects + mov eax, dword ptr v1 + mov ebx, dword ptr v2 + + movd mm0, [eax] + movd mm1, [ebx] + pxor mm2, mm2 + nop + + movd [ecx], mm0 + movd [ecx+8], mm1 + + punpcklbw mm0, mm2 + punpcklbw mm1, mm2 + + paddw mm0, mm1 + psrlw mm0, 1 + + packuswb mm0, mm0 + movd [ecx+4], mm0 + } + // *(long *)&pVects[1] = r1; +} + +__int64 ScaleOneThird = 0x5500550055005500; + +// ---------------------------------------------------------------------------- +// Build palette for a 4 color block +// ---------------------------------------------------------------------------- +void DXTCGen::BuildCodes4(cbVector *pVects, cbVector &v1, cbVector &v2) +{ +// pVects[0] = v1; +// pVects[3] = v2; +// +// pVects[1][0] = v1[0]; +// pVects[1][1] = (BYTE)( ((long)v1[1] * 2 + (long)v2[1]) / 3 ); +// pVects[1][2] = (BYTE)( ((long)v1[2] * 2 + (long)v2[2]) / 3 ); +// pVects[1][3] = (BYTE)( ((long)v1[3] * 2 + (long)v2[3]) / 3 ); +// +// pVects[2][0] = v1[0]; +// pVects[2][1] = (BYTE)( ((long)v2[1] * 2 + (long)v1[1]) / 3 ); +// pVects[2][2] = (BYTE)( ((long)v2[2] * 2 + (long)v1[2]) / 3 ); +// pVects[2][3] = (BYTE)( ((long)v2[3] * 2 + (long)v1[3]) / 3 ); + + __asm { + mov ecx, dword ptr pVects + mov eax, dword ptr v1 + mov ebx, dword ptr v2 + + movd mm0, [eax] + movd mm1, [ebx] + + pxor mm2, mm2 + movd [ecx], mm0 + movd [ecx+12], mm1 + + punpcklbw mm0, mm2 + punpcklbw mm1, mm2 + movq mm3, mm0 // mm3 = v0 + + paddw mm0, mm1 // mm0 = v0 + v1 + paddw mm3, mm3 // mm3 = v0*2 + + paddw mm0, mm1 // mm0 = v0 + v1*2 + paddw mm1, mm3 // mm1 = v0*2 + v1 + + pmulhw mm0, ScaleOneThird + pmulhw mm1, ScaleOneThird + packuswb mm1, mm0 + + movq [ecx+4], mm1 + } +} +*/ + void BlockDXT1::decodeBlock(ColorBlock * block) const { nvDebugCheck(block != NULL); @@ -207,24 +248,6 @@ void BlockDXT1::decodeBlock(ColorBlock * block) const } } -void BlockDXT1::decodeBlockNV5x(ColorBlock * block) const -{ - nvDebugCheck(block != NULL); - - // Decode color block. - Color32 color_array[4]; - evaluatePaletteNV5x(color_array); - - // Write color block. - for( uint j = 0; j < 4; j++ ) { - for( uint i = 0; i < 4; i++ ) { - uint idx = (row[j] >> (2 * i)) & 3; - block->color(i, j) = color_array[idx]; - } - } -} - - void BlockDXT1::setIndices(int * idx) { indices = 0; @@ -263,14 +286,6 @@ void BlockDXT3::decodeBlock(ColorBlock * block) const alpha.decodeBlock(block); } -void BlockDXT3::decodeBlockNV5x(ColorBlock * block) const -{ - nvDebugCheck(block != NULL); - - color.decodeBlockNV5x(block); - alpha.decodeBlock(block); -} - void AlphaBlockDXT3::decodeBlock(ColorBlock * block) const { nvDebugCheck(block != NULL); @@ -451,17 +466,7 @@ void BlockDXT5::decodeBlock(ColorBlock * block) const // Decode alpha. alpha.decodeBlock(block); -} -void BlockDXT5::decodeBlockNV5x(ColorBlock * block) const -{ - nvDebugCheck(block != NULL); - - // Decode color. - color.decodeBlockNV5x(block); - - // Decode alpha. - alpha.decodeBlock(block); } /// Flip DXT5 block vertically. diff --git a/src/nvimage/BlockDXT.h b/src/nvimage/BlockDXT.h index 1603072..5a45c40 100644 --- a/src/nvimage/BlockDXT.h +++ b/src/nvimage/BlockDXT.h @@ -47,13 +47,11 @@ namespace nv bool isFourColorMode() const; uint evaluatePalette(Color32 color_array[4]) const; - uint evaluatePaletteNV5x(Color32 color_array[4]) const; - + uint evaluatePaletteFast(Color32 color_array[4]) const; void evaluatePalette3(Color32 color_array[4]) const; void evaluatePalette4(Color32 color_array[4]) const; void decodeBlock(ColorBlock * block) const; - void decodeBlockNV5x(ColorBlock * block) const; void setIndices(int * idx); @@ -107,7 +105,6 @@ namespace nv BlockDXT1 color; void decodeBlock(ColorBlock * block) const; - void decodeBlockNV5x(ColorBlock * block) const; void flip4(); void flip2(); @@ -163,7 +160,6 @@ namespace nv BlockDXT1 color; void decodeBlock(ColorBlock * block) const; - void decodeBlockNV5x(ColorBlock * block) const; void flip4(); void flip2(); diff --git a/src/nvimage/CMakeLists.txt b/src/nvimage/CMakeLists.txt index 83b87fb..3f66b00 100644 --- a/src/nvimage/CMakeLists.txt +++ b/src/nvimage/CMakeLists.txt @@ -14,16 +14,18 @@ SET(IMAGE_SRCS ColorBlock.cpp BlockDXT.h BlockDXT.cpp + HoleFilling.h + HoleFilling.cpp DirectDrawSurface.h DirectDrawSurface.cpp Quantize.h Quantize.cpp NormalMap.h NormalMap.cpp + NormalMipmap.h + NormalMipmap.cpp PsdFile.h - TgaFile.h - ColorSpace.h - ColorSpace.cpp) + TgaFile.h) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) @@ -47,11 +49,6 @@ IF(OPENEXR_FOUND) INCLUDE_DIRECTORIES(${OPENEXR_INCLUDE_PATHS}) ENDIF(OPENEXR_FOUND) -IF(FREEIMAGE_FOUND) - SET(LIBS ${LIBS} ${FREEIMAGE_LIBRARIES}) - INCLUDE_DIRECTORIES(${FREEIMAGE_INCLUDE_PATH}) -ENDIF(FREEIMAGE_FOUND) - # targets ADD_DEFINITIONS(-DNVIMAGE_EXPORTS) diff --git a/src/nvimage/ColorBlock.cpp b/src/nvimage/ColorBlock.cpp index 16cd922..fac8859 100644 --- a/src/nvimage/ColorBlock.cpp +++ b/src/nvimage/ColorBlock.cpp @@ -1,6 +1,5 @@ // This code is in the public domain -- castanyo@yahoo.es -#include // swap #include #include #include @@ -58,9 +57,11 @@ void ColorBlock::init(const Image * img, uint x, uint y) const uint bw = min(img->width() - x, 4U); const uint bh = min(img->height() - y, 4U); - nvDebugCheck(bw != 0 && bh != 0); - static const int remainder[] = { + nvDebugCheck(bw != 0); + nvDebugCheck(bh != 0); + + static int remainder[] = { 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 0, @@ -81,116 +82,40 @@ void ColorBlock::init(const Image * img, uint x, uint y) } } -void ColorBlock::init(uint w, uint h, uint * data, uint x, uint y) -{ - nvDebugCheck(data != NULL); - - const uint bw = min(w - x, 4U); - const uint bh = min(h - y, 4U); - nvDebugCheck(bw != 0 && bh != 0); - - // Blocks that are smaller than 4x4 are handled by repeating the pixels. - // @@ Thats only correct when block size is 1, 2 or 4, but not with 3. :( - for (uint i = 0; i < 4; i++) +void ColorBlock::swizzleDXT5n() +{ + for(int i = 0; i < 16; i++) { - const int by = i % bh; - - for (uint e = 0; e < 4; e++) - { - const int bx = e % bw; - const uint idx = (y + by) * w + x + bx; - - color(e, i).u = data[idx]; - } + Color32 c = m_color[i]; + m_color[i] = Color32(0xFF, c.g, 0, c.r); } } -void ColorBlock::init(uint w, uint h, float * data, uint x, uint y) +void ColorBlock::splatX() { - nvDebugCheck(data != NULL); - - const uint bw = min(w - x, 4U); - const uint bh = min(h - y, 4U); - nvDebugCheck(bw != 0 && bh != 0); - - // Blocks that are smaller than 4x4 are handled by repeating the pixels. - // @@ Thats only correct when block size is 1, 2 or 4, but not with 3. :( - - for (uint i = 0; i < 4; i++) + for(int i = 0; i < 16; i++) { - const uint by = i % bh; - - for (uint e = 0; e < 4; e++) - { - const uint bx = e % bw; - const uint idx = ((y + by) * w + x + bx) * 4; - - Color32 & c = color(e, i); - c.r = uint8(255 * clamp(data[idx + 0], 0.0f, 1.0f)); - c.g = uint8(255 * clamp(data[idx + 1], 0.0f, 1.0f)); - c.b = uint8(255 * clamp(data[idx + 2], 0.0f, 1.0f)); - c.a = uint8(255 * clamp(data[idx + 3], 0.0f, 1.0f)); - } + uint8 x = m_color[i].r; + m_color[i] = Color32(x, x, x, x); } } -static inline uint8 component(Color32 c, uint i) -{ - if (i == 0) return c.r; - if (i == 1) return c.g; - if (i == 2) return c.b; - if (i == 3) return c.a; - if (i == 4) return 0xFF; - return 0; -} - -void ColorBlock::swizzle(uint x, uint y, uint z, uint w) +void ColorBlock::splatY() { - for (int i = 0; i < 16; i++) + for(int i = 0; i < 16; i++) { - Color32 c = m_color[i]; - m_color[i].r = component(c, x); - m_color[i].g = component(c, y); - m_color[i].b = component(c, z); - m_color[i].a = component(c, w); + uint8 y = m_color[i].g; + m_color[i] = Color32(y, y, y, y); } } - /// Returns true if the block has a single color. bool ColorBlock::isSingleColor() const { - Color32 mask(0xFF, 0xFF, 0xFF, 0x00); - uint u = m_color[0].u & mask.u; - - for (int i = 1; i < 16; i++) - { - if (u != (m_color[i].u & mask.u)) - { - return false; - } - } - - return true; -} - -/// Returns true if the block has a single color, ignoring transparent pixels. -bool ColorBlock::isSingleColorNoAlpha() const -{ - Color32 c; - int i; - for(i = 0; i < 16; i++) - { - if (m_color[i].a != 0) c = m_color[i]; - } - - Color32 mask(0xFF, 0xFF, 0xFF, 0x00); - uint u = c.u & mask.u; - - for(; i < 16; i++) + for(int i = 1; i < 16; i++) { - if (u != (m_color[i].u & mask.u)) + if (m_color[0] != m_color[i]) { return false; } diff --git a/src/nvimage/ColorBlock.h b/src/nvimage/ColorBlock.h index 0588d92..00f9c8e 100644 --- a/src/nvimage/ColorBlock.h +++ b/src/nvimage/ColorBlock.h @@ -18,13 +18,12 @@ namespace nv ColorBlock(const Image * img, uint x, uint y); void init(const Image * img, uint x, uint y); - void init(uint w, uint h, uint * data, uint x, uint y); - void init(uint w, uint h, float * data, uint x, uint y); - void swizzle(uint x, uint y, uint z, uint w); // 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0 + void swizzleDXT5n(); + void splatX(); + void splatY(); bool isSingleColor() const; - bool isSingleColorNoAlpha() const; uint countUniqueColors() const; Color32 averageColor() const; bool hasAlpha() const; diff --git a/src/nvimage/ColorSpace.cpp b/src/nvimage/ColorSpace.cpp deleted file mode 100644 index f6ac4ce..0000000 --- a/src/nvimage/ColorSpace.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// This code is in the public domain -- jim@tilander.org - -#include - -#include -#include - -#include "ColorSpace.h" - -namespace nv -{ - void ColorSpace::RGBtoYCoCg_R(Image* img) - { - const uint w = img->width(); - const uint h = img->height(); - - for( uint y=0; y < h; y++ ) - { - for( uint x=0; x < w; x++ ) - { - Color32 pixel = img->pixel(x, y); - - const int r = pixel.r; - const int g = pixel.g; - const int b = pixel.b; - - const int Co = r - b; - const int t = b + Co/2; - const int Cg = g - t; - const int Y = t + Cg/2; - - // Just saturate the chroma here (we loose out of one bit in each channel) - // this just means that we won't have as high dynamic range. Perhaps a better option - // is to loose the least significant bit instead? - pixel.r = clamp(Co + 128, 0, 255); - pixel.g = clamp(Cg + 128, 0, 255); - pixel.b = 0; - pixel.a = Y; - } - } - } - - void ColorSpace::YCoCg_RtoRGB(Image* img) - { - const uint w = img->width(); - const uint h = img->height(); - - for( uint y=0; y < h; y++ ) - { - for( uint x=0; x < w; x++ ) - { - Color32 pixel = img->pixel(x, y); - - const int Co = (int)pixel.r - 128; - const int Cg = (int)pixel.g - 128; - const int Y = pixel.a; - - const int t = Y - Cg/2; - const int g = Cg + t; - const int b = t - Co/2; - const int r = b + Co; - - pixel.r = r; - pixel.g = g; - pixel.b = b; - pixel.a = 1; - } - } - } -} diff --git a/src/nvimage/ColorSpace.h b/src/nvimage/ColorSpace.h deleted file mode 100644 index 8c35760..0000000 --- a/src/nvimage/ColorSpace.h +++ /dev/null @@ -1,21 +0,0 @@ -// This code is in the public domain -- jim@tilander.org - -#ifndef NV_IMAGE_COLORSPACE_H -#define NV_IMAGE_COLORSPACE_H - -namespace nv -{ - class Image; - - // Defines simple mappings between different color spaces and encodes them in the - // input image. - namespace ColorSpace - { - void RGBtoYCoCg_R(Image* img); - void YCoCg_RtoRGB(Image* img); - } -} - - - -#endif diff --git a/src/nvimage/ConeMap.cpp b/src/nvimage/ConeMap.cpp new file mode 100644 index 0000000..ef7bb85 --- /dev/null +++ b/src/nvimage/ConeMap.cpp @@ -0,0 +1,122 @@ +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include + +#include + +#include +#include +#include +#include + +using namespace nv; + + +static float processPixel(const FloatImage * img, uint x, uint y) +{ + nvDebugCheck(img != NULL); + + const uint w = img->width(); + const uint h = img->height(); + + float d = img->pixel(x, y, 0); + + float fx0 = (float) x / w; + float fy0 = (float) y / h; + + float best_ratio = INF; + uint best_x = w; + uint best_y = h; + + for (uint yy = 0; yy < h; yy++) + { + for (uint xx = 0; xx < w; xx++) + { + float ch = d - img->pixel(xx, yy, 0); + + if (ch > 0) + { + float dx = float(xx - x); + float dy = float(yy - y); + + float ratio = (dx * dx + dy * dy) / ch; + + if (ratio < best_ratio) + { + best_x = xx; + best_y = yy; + } + } + } + } + + if (best_x != w) + { + nvDebugCheck(best_y !=h); + + float dx = float(best_x - x) / w; + float dy = float(best_y - y) / h; + + float cw = sqrtf(dx*dx + dy*dy); + float ch = d - img->pixel(xx, yy, 0); + + return min(1, sqrtf(cw / ch)); + } + + return 1; +} + + +// Create cone map using the given kernels. +FloatImage * createConeMap(const Image * img, Vector4::Arg heightWeights) +{ + nvCheck(img != NULL); + + const uint w = img->width(); + const uint h = img->height(); + + AutoPtr fimage(new FloatImage()); + //fimage->allocate(2, w, h); + fimage->allocate(4, w, h); + + // Compute height and store in red channel: + float * heightChannel = fimage->channel(0); + for(uint i = 0; i < w*h; i++) + { + Vector4 color = toVector4(img->pixel(i)); + heightChannel[i] = dot(color, heightWeights); + } + + // Compute cones: + for(uint y = 0; y < h; y++) + { + for(uint x = 0; x < w; x++) + { + processPixel(fimage, x, y); + } + } + + return fimage.release(); +} + diff --git a/src/nvtt/tests/imperativeapi.cpp b/src/nvimage/ConeMap.h similarity index 58% rename from src/nvtt/tests/imperativeapi.cpp rename to src/nvimage/ConeMap.h index 4a50c74..0c79533 100644 --- a/src/nvtt/tests/imperativeapi.cpp +++ b/src/nvimage/ConeMap.h @@ -1,58 +1,39 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include - -#include - - -int main(int argc, char *argv[]) -{ - nvtt::CompressionOptions compressionOptions; - compressionOptions.setFormat(nvtt::Format_BC1); - - nvtt::OutputOptions outputOptions; - outputOptions.setFileName("output.dds"); - - nvtt::Context context; - nvtt::TexImage image = context.createTexImage(); - - image.load("kodim01.png"); - - context.outputHeader(image, image.countMipmaps(), compressionOptions, outputOptions); - - float gamma = 2.2; - image.toLinear(gamma); - - while (image.buildNextMipmap(nvtt::MipmapFilter_Box)) - { - nvtt::TexImage tmpImage = image; - tmpImage.toGamma(gamma); - - context.compress(tmpImage, compressionOptions, outputOptions); - // tmpImage.compress(compressionOptions, outputOptions); - } - - return EXIT_SUCCESS; -} - +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NV_IMAGE_CONEMAP_H +#define NV_IMAGE_CONEMAP_H + +#include +#include + +namespace nv +{ + class Image; + class FloatImage; + + FloatImage * createConeMap(const Image * img, Vector4::Arg heightWeights); + +} // nv namespace + +#endif // NV_IMAGE_CONEMAP_H diff --git a/src/nvimage/DirectDrawSurface.cpp b/src/nvimage/DirectDrawSurface.cpp index 49bac37..42e71c3 100644 --- a/src/nvimage/DirectDrawSurface.cpp +++ b/src/nvimage/DirectDrawSurface.cpp @@ -21,16 +21,16 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. +#include +#include // max +#include + #include #include #include #include #include -#include -#include // max -#include - #include // memset @@ -406,14 +406,10 @@ namespace nv s << pf.flags; s << pf.fourcc; s << pf.bitcount; - s.serialize(&pf.rmask, sizeof(pf.rmask)); - s.serialize(&pf.gmask, sizeof(pf.gmask)); - s.serialize(&pf.bmask, sizeof(pf.bmask)); - s.serialize(&pf.amask, sizeof(pf.amask)); - // s << pf.rmask; - // s << pf.gmask; - // s << pf.bmask; - // s << pf.amask; + s << pf.rmask; + s << pf.gmask; + s << pf.bmask; + s << pf.amask; return s; } @@ -449,9 +445,7 @@ namespace nv s << header.pitch; s << header.depth; s << header.mipmapcount; - for (int i = 0; i < 11; i++) { - s << header.reserved[i]; - } + s.serialize(header.reserved, 11 * sizeof(uint)); s << header.pf; s << header.caps; s << header.notused; @@ -538,7 +532,7 @@ DDSHeader::DDSHeader() // Store version information on the reserved header attributes. this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T'); - this->reserved[10] = (2 << 16) | (1 << 8) | (0); // major.minor.revision + this->reserved[10] = (2 << 16) | (0 << 8) | (7); // major.minor.revision this->pf.size = 32; this->pf.flags = 0; @@ -576,7 +570,7 @@ void DDSHeader::setHeight(uint h) void DDSHeader::setDepth(uint d) { this->flags |= DDSD_DEPTH; - this->depth = d; + this->height = d; } void DDSHeader::setMipmapCount(uint count) @@ -605,7 +599,6 @@ void DDSHeader::setMipmapCount(uint count) void DDSHeader::setTexture2D() { this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; - this->header10.arraySize = 1; } void DDSHeader::setTexture3D() @@ -613,7 +606,6 @@ void DDSHeader::setTexture3D() this->caps.caps2 = DDSCAPS2_VOLUME; this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE3D; - this->header10.arraySize = 1; } void DDSHeader::setTextureCube() @@ -644,33 +636,22 @@ void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3) // set fourcc pixel format. this->pf.flags = DDPF_FOURCC; this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3); - - this->pf.bitcount = 0; - this->pf.rmask = 0; - this->pf.gmask = 0; - this->pf.bmask = 0; - this->pf.amask = 0; -} - -void DDSHeader::setFormatCode(uint32 code) -{ - // set fourcc pixel format. - this->pf.flags = DDPF_FOURCC; - this->pf.fourcc = code; - this->pf.bitcount = 0; + if (this->pf.fourcc == FOURCC_ATI2) + { + this->pf.bitcount = FOURCC_A2XY; + } + else + { + this->pf.bitcount = 0; + } + this->pf.rmask = 0; this->pf.gmask = 0; this->pf.bmask = 0; this->pf.amask = 0; } -void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3) -{ - this->pf.bitcount = MAKEFOURCC(c0, c1, c2, c3); -} - - void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) { // Make sure the masks are correct. @@ -681,17 +662,10 @@ void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask nvCheck((gmask & amask) == 0); nvCheck((bmask & amask) == 0); - if (rmask != 0 || gmask != 0 || bmask != 0) - { - this->pf.flags = DDPF_RGB; - - if (amask != 0) { - this->pf.flags |= DDPF_ALPHAPIXELS; - } - } - else if (amask != 0) - { - this->pf.flags |= DDPF_ALPHA; + this->pf.flags = DDPF_RGB; + + if (amask != 0) { + this->pf.flags |= DDPF_ALPHAPIXELS; } if (bitcount == 0) @@ -733,12 +707,6 @@ void DDSHeader::setNormalFlag(bool b) else this->pf.flags &= ~DDPF_NORMAL; } -void DDSHeader::setHasAlphaFlag(bool b) -{ - if (b) this->pf.flags |= DDPF_ALPHAPIXELS; - else this->pf.flags &= ~DDPF_ALPHAPIXELS; -} - void DDSHeader::swapBytes() { this->fourcc = POSH_LittleU32(this->fourcc); @@ -791,15 +759,6 @@ DirectDrawSurface::DirectDrawSurface(const char * name) : stream(new StdInputStr } } -DirectDrawSurface::DirectDrawSurface(Stream * s) : stream(s) -{ - if (!stream->isError()) - { - (*stream) << header; - } -} - - DirectDrawSurface::~DirectDrawSurface() { delete stream; @@ -839,16 +798,6 @@ bool DirectDrawSurface::isSupported() const if (header.hasDX10Header()) { - if (header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM) - { - return true; - } - - return false; } else { @@ -892,41 +841,6 @@ bool DirectDrawSurface::isSupported() const return true; } -bool DirectDrawSurface::hasAlpha() const -{ - if (header.hasDX10Header()) - { -#pragma message(NV_FILE_LINE "TODO: Update hasAlpha to handle all DX10 formats.") - return - header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM; - } - else - { - if (header.pf.flags & DDPF_RGB) - { - return header.pf.amask != 0; - } - else if (header.pf.flags & DDPF_FOURCC) - { - if (header.pf.fourcc == FOURCC_RXGB || - header.pf.fourcc == FOURCC_ATI1 || - header.pf.fourcc == FOURCC_ATI2 || - header.pf.flags & DDPF_NORMAL) - { - return false; - } - else - { - // @@ Here we could check the ALPHA_PIXELS flag, but nobody sets it. - return true; - } - } - - return false; - } -} uint DirectDrawSurface::mipmapCount() const { @@ -1005,13 +919,6 @@ void DirectDrawSurface::setNormalFlag(bool b) header.setNormalFlag(b); } -void DirectDrawSurface::setHasAlphaFlag(bool b) -{ - nvDebugCheck(isValid()); - header.setHasAlphaFlag(b); -} - - void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap) { nvDebugCheck(isValid()); @@ -1029,32 +936,15 @@ void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap) } img->allocate(w, h); - - if (hasAlpha()) - { - img->setFormat(Image::Format_ARGB); - } - else + + if (header.pf.flags & DDPF_RGB) { - img->setFormat(Image::Format_RGB); + readLinearImage(img); } - - if (header.hasDX10Header()) + else if (header.pf.flags & DDPF_FOURCC) { - // So far only block formats supported. readBlockImage(img); } - else - { - if (header.pf.flags & DDPF_RGB) - { - readLinearImage(img); - } - else if (header.pf.flags & DDPF_FOURCC) - { - readBlockImage(img); - } - } } void DirectDrawSurface::readLinearImage(Image * img) @@ -1079,7 +969,16 @@ void DirectDrawSurface::readLinearImage(Image * img) uint byteCount = (header.pf.bitcount + 7) / 8; -#pragma message(NV_FILE_LINE "TODO: Support floating point linear images and other FOURCC codes.") + // set image format: RGB or ARGB + // alpha channel exists if and only if the alpha mask is non-zero + if (header.pf.amask == 0) + { + img->setFormat(Image::Format_RGB); + } + else + { + img->setFormat(Image::Format_ARGB); + } // Read linear RGB images. for (uint y = 0; y < h; y++) @@ -1105,6 +1004,19 @@ void DirectDrawSurface::readBlockImage(Image * img) nvDebugCheck(stream != NULL); nvDebugCheck(img != NULL); + // set image format: RGB or ARGB + if (header.pf.fourcc == FOURCC_RXGB || + header.pf.fourcc == FOURCC_ATI1 || + header.pf.fourcc == FOURCC_ATI2 || + header.pf.flags & DDPF_NORMAL) + { + img->setFormat(Image::Format_RGB); + } + else + { + img->setFormat(Image::Format_ARGB); + } + const uint w = img->width(); const uint h = img->height(); @@ -1149,33 +1061,20 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba) nvDebugCheck(stream != NULL); nvDebugCheck(rgba != NULL); - uint fourcc = header.pf.fourcc; - - // Map DX10 block formats to fourcc codes. - if (header.hasDX10Header()) - { - if (header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM) fourcc = FOURCC_DXT1; - if (header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM) fourcc = FOURCC_DXT3; - if (header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM) fourcc = FOURCC_DXT5; - if (header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM) fourcc = FOURCC_ATI1; - if (header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM) fourcc = FOURCC_ATI2; - } - - - if (fourcc == FOURCC_DXT1) + if (header.pf.fourcc == FOURCC_DXT1) { BlockDXT1 block; *stream << block; block.decodeBlock(rgba); } - else if (fourcc == FOURCC_DXT2 || + else if (header.pf.fourcc == FOURCC_DXT2 || header.pf.fourcc == FOURCC_DXT3) { BlockDXT3 block; *stream << block; block.decodeBlock(rgba); } - else if (fourcc == FOURCC_DXT4 || + else if (header.pf.fourcc == FOURCC_DXT4 || header.pf.fourcc == FOURCC_DXT5 || header.pf.fourcc == FOURCC_RXGB) { @@ -1183,7 +1082,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba) *stream << block; block.decodeBlock(rgba); - if (fourcc == FOURCC_RXGB) + if (header.pf.fourcc == FOURCC_RXGB) { // Swap R & A. for (int i = 0; i < 16; i++) @@ -1195,13 +1094,13 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba) } } } - else if (fourcc == FOURCC_ATI1) + else if (header.pf.fourcc == FOURCC_ATI1) { BlockATI1 block; *stream << block; block.decodeBlock(rgba); } - else if (fourcc == FOURCC_ATI2) + else if (header.pf.fourcc == FOURCC_ATI2) { BlockATI2 block; *stream << block; @@ -1211,7 +1110,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba) // If normal flag set, convert to normal. if (header.pf.flags & DDPF_NORMAL) { - if (fourcc == FOURCC_ATI2) + if (header.pf.fourcc == FOURCC_ATI2) { for (int i = 0; i < 16; i++) { @@ -1219,7 +1118,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba) c = buildNormal(c.r, c.g); } } - else if (fourcc == FOURCC_DXT5) + else if (header.pf.fourcc == FOURCC_DXT5) { for (int i = 0; i < 16; i++) { @@ -1245,27 +1144,6 @@ uint DirectDrawSurface::blockSize() const case FOURCC_RXGB: case FOURCC_ATI2: return 16; - case FOURCC_DX10: - switch(header.header10.dxgiFormat) - { - case DXGI_FORMAT_BC1_TYPELESS: - case DXGI_FORMAT_BC1_UNORM: - case DXGI_FORMAT_BC1_UNORM_SRGB: - case DXGI_FORMAT_BC4_TYPELESS: - case DXGI_FORMAT_BC4_UNORM: - case DXGI_FORMAT_BC4_SNORM: - return 8; - case DXGI_FORMAT_BC2_TYPELESS: - case DXGI_FORMAT_BC2_UNORM: - case DXGI_FORMAT_BC2_UNORM_SRGB: - case DXGI_FORMAT_BC3_TYPELESS: - case DXGI_FORMAT_BC3_UNORM: - case DXGI_FORMAT_BC3_UNORM_SRGB: - case DXGI_FORMAT_BC5_TYPELESS: - case DXGI_FORMAT_BC5_UNORM: - case DXGI_FORMAT_BC5_SNORM: - return 16; - }; }; // Not a block image. diff --git a/src/nvimage/DirectDrawSurface.h b/src/nvimage/DirectDrawSurface.h index fb19ccc..6ea8c4b 100644 --- a/src/nvimage/DirectDrawSurface.h +++ b/src/nvimage/DirectDrawSurface.h @@ -93,12 +93,9 @@ namespace nv void setLinearSize(uint size); void setPitch(uint pitch); void setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3); - void setFormatCode(uint code); - void setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3); void setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); void setDX10Format(uint format); void setNormalFlag(bool b); - void setHasAlphaFlag(bool b); void swapBytes(); @@ -113,13 +110,10 @@ namespace nv { public: DirectDrawSurface(const char * file); - DirectDrawSurface(Stream * stream); ~DirectDrawSurface(); bool isValid() const; bool isSupported() const; - - bool hasAlpha() const; uint mipmapCount() const; uint width() const; @@ -131,7 +125,6 @@ namespace nv bool isTextureCube() const; void setNormalFlag(bool b); - void setHasAlphaFlag(bool b); void mipmap(Image * img, uint f, uint m); // void mipmap(FloatImage * img, uint f, uint m); diff --git a/src/nvimage/FloatImage.cpp b/src/nvimage/FloatImage.cpp index 77d54e6..90818ca 100644 --- a/src/nvimage/FloatImage.cpp +++ b/src/nvimage/FloatImage.cpp @@ -187,12 +187,12 @@ void FloatImage::normalize(uint base_component) void FloatImage::packNormals(uint base_component) { - scaleBias(base_component, 3, 0.5f, 0.5f); + scaleBias(base_component, 3, 0.5f, 1.0f); } void FloatImage::expandNormals(uint base_component) { - scaleBias(base_component, 3, 2.0f, -1.0f); + scaleBias(base_component, 3, 2, -0.5); } void FloatImage::scaleBias(uint base_component, uint num, float scale, float bias) @@ -203,7 +203,7 @@ void FloatImage::scaleBias(uint base_component, uint num, float scale, float bia float * ptr = this->channel(base_component + c); for(uint i = 0; i < size; i++) { - ptr[i] = scale * ptr[i] + bias; + ptr[i] = scale * (ptr[i] + bias); } } } @@ -242,57 +242,6 @@ void FloatImage::exponentiate(uint base_component, uint num, float power) } } -/// Apply linear transform. -void FloatImage::transform(uint base_component, const Matrix & m, Vector4::Arg offset) -{ - nvCheck(base_component + 4 <= m_componentNum); - - const uint size = m_width * m_height; - - float * r = this->channel(base_component + 0); - float * g = this->channel(base_component + 1); - float * b = this->channel(base_component + 2); - float * a = this->channel(base_component + 3); - - for (uint i = 0; i < size; i++) - { - Vector4 color = nv::transform(m, Vector4(*r, *g, *b, *a)) + offset; - - *r++ = color.x(); - *g++ = color.y(); - *b++ = color.z(); - *a++ = color.w(); - } -} - -void FloatImage::swizzle(uint base_component, uint r, uint g, uint b, uint a) -{ - nvCheck(base_component + 4 <= m_componentNum); - nvCheck(r < 7 && g < 7 && b < 7 && a < 7); - - const uint size = m_width * m_height; - - float consts[] = { 1.0f, 0.0f, -1.0f }; - float * c[7]; - c[0] = this->channel(base_component + 0); - c[1] = this->channel(base_component + 1); - c[2] = this->channel(base_component + 2); - c[3] = this->channel(base_component + 3); - c[4] = consts; - c[5] = consts + 1; - c[6] = consts + 2; - - for (uint i = 0; i < size; i++) - { - float tmp[4] = { *c[r], *c[g], *c[b], *c[a] }; - - *c[0]++ = tmp[0]; - *c[1]++ = tmp[1]; - *c[2]++ = tmp[2]; - *c[3]++ = tmp[3]; - } -} - float FloatImage::sampleNearest(const float x, const float y, const int c, const WrapMode wm) const { if( wm == WrapMode_Clamp ) return sampleNearestClamp(x, y, c); @@ -643,7 +592,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode float * dst_channel = dst_image->channel(c); for (uint x = 0; x < w; x++) { - tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer()); + tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer()); for (uint y = 0; y < h; y++) { dst_channel[y * w + x] = tmp_column[y]; @@ -664,7 +613,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode float * tmp_channel = tmp_image->channel(c); for (uint x = 0; x < w; x++) { - tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer()); + tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer()); for (uint y = 0; y < h; y++) { tmp_channel[y * w + x] = tmp_column[y]; @@ -716,7 +665,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode float * dst_channel = dst_image->channel(c); for (uint x = 0; x < w; x++) { - tmp_image->applyKernelVertical(ykernel, x, c, alpha, wm, tmp_column.mutableBuffer()); + tmp_image->applyKernelVertical(ykernel, x, c, alpha, wm, tmp_column.unsecureBuffer()); for (uint y = 0; y < h; y++) { dst_channel[y * w + x] = tmp_column[y]; diff --git a/src/nvimage/FloatImage.h b/src/nvimage/FloatImage.h index d58feb8..96d1630 100644 --- a/src/nvimage/FloatImage.h +++ b/src/nvimage/FloatImage.h @@ -8,7 +8,7 @@ #include #include -#include // clamp +#include // clamp #include // abs @@ -68,9 +68,7 @@ public: NVIMAGE_API void toGamma(uint base_component, uint num, float gamma = 2.2f); NVIMAGE_API void exponentiate(uint base_component, uint num, float power); - NVIMAGE_API void transform(uint base_component, const Matrix & m, const Vector4 & offset); - NVIMAGE_API void swizzle(uint base_component, uint r, uint g, uint b, uint a); - + NVIMAGE_API FloatImage * fastDownSample() const; NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const; NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const; diff --git a/src/nvimage/HoleFilling.cpp b/src/nvimage/HoleFilling.cpp new file mode 100644 index 0000000..863dc16 --- /dev/null +++ b/src/nvimage/HoleFilling.cpp @@ -0,0 +1,753 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include +#include + +#include + +#include +#include + +using namespace nv; + + +// This is a variation of Sapiro's inpainting method. +void nv::fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap) +{ + nvCheck(img != NULL); + nvCheck(bmap != NULL); + + const int w = img->width(); + const int h = img->height(); + const int count = img->componentNum(); + + nvCheck(bmap->width() == uint(w)); + nvCheck(bmap->height() == uint(h)); + + AutoPtr newbmap(new BitMap(w, h)); + + for(int p = 0; p < passCount; p++) + { + for(int c = 0; c < count; c++) + { + float * channel = img->channel(c); + + for(int y = 0; y < h; y++) { + for(int x = 0; x < w; x++) { + + if (bmap->bitAt(x, y)) { + // Not a hole. + newbmap->setBitAt(x, y); + continue; + } + + const bool west = bmap->bitAt(img->indexClamp(x-1, y)); + const bool east = bmap->bitAt(img->indexClamp(x+1, y)); + const bool north = bmap->bitAt(img->indexClamp(x, y-1)); + const bool south = bmap->bitAt(img->indexClamp(x, y+1)); + const bool northwest = bmap->bitAt(img->indexClamp(x-1, y-1)); + const bool northeast = bmap->bitAt(img->indexClamp(x+1, y-1)); + const bool southwest = bmap->bitAt(img->indexClamp(x-1, y+1)); + const bool southeast = bmap->bitAt(img->indexClamp(x+1, y+1)); + + int num = west + east + north + south + northwest + northeast + southwest + southeast; + + if (num != 0) { + + float average = 0.0f; + if (num == 3 && west && northwest && southwest) { + average = channel[img->indexClamp(x-1, y)]; + } + else if (num == 3 && east && northeast && southeast) { + average = channel[img->indexClamp(x+1, y)]; + } + else if (num == 3 && north && northwest && northeast) { + average = channel[img->indexClamp(x, y-1)]; + } + else if (num == 3 && south && southwest && southeast) { + average = channel[img->indexClamp(x, y+1)]; + } + else { + float total = 0.0f; + if (west) { average += 1 * channel[img->indexClamp(x-1, y)]; total += 1; } + if (east) { average += 1 * channel[img->indexClamp(x+1, y)]; total += 1; } + if (north) { average += 1 * channel[img->indexClamp(x, y-1)]; total += 1; } + if (south) { average += 1 * channel[img->indexClamp(x, y+1)]; total += 1; } + + if (northwest) { average += channel[img->indexClamp(x-1, y-1)]; ++total; } + if (northeast) { average += channel[img->indexClamp(x+1, y-1)]; ++total; } + if (southwest) { average += channel[img->indexClamp(x-1, y+1)]; ++total; } + if (southeast) { average += channel[img->indexClamp(x+1, y+1)]; ++total; } + + average /= total; + } + + channel[img->indexClamp(x, y)] = average; + newbmap->setBitAt(x, y); + } + } + } + } + + // Update the bit mask. + swap(*newbmap, *bmap); + } +} + + +namespace { + + struct Neighbor { + uint16 x; + uint16 y; + uint32 d; + }; + + // Compute euclidean squared distance. + static uint dist( uint16 ax, uint16 ay, uint16 bx, uint16 by ) { + int dx = bx - ax; + int dy = by - ay; + return uint(dx*dx + dy*dy); + } + + // Check neighbour, this is the core of the EDT algorithm. + static void checkNeighbour( int x, int y, Neighbor * e, const Neighbor & n ) { + nvDebugCheck(e != NULL); + + uint d = dist( x, y, n.x, n.y ); + if( d < e->d ) { + e->x = n.x; + e->y = n.y; + e->d = d; + } + } + +} // namespace + +// Voronoi filling using EDT-4 +void nv::fillVoronoi(FloatImage * img, const BitMap * bmap) +{ + nvCheck(img != NULL); + + const int w = img->width(); + const int h = img->height(); + const int count = img->componentNum(); + + nvCheck(bmap->width() == uint(w)); + nvCheck(bmap->height() == uint(h)); + + Array edm; + edm.resize(w * h); + + int x, y; + int x0, x1, y0, y1; + + // Init edm. + for( y = 0; y < h; y++ ) { + for( x = 0; x < w; x++ ) { + if( bmap->bitAt(x, y) ) { + edm[y * w + x].x = x; + edm[y * w + x].y = y; + edm[y * w + x].d = 0; + } + else { + edm[y * w + x].x = w; + edm[y * w + x].y = h; + edm[y * w + x].d = w*w + h*h; + } + } + } + + // First pass. + for( y = 0; y < h; y++ ) { + for( x = 0; x < w; x++ ) { + x0 = clamp(x-1, 0, w-1); // @@ Wrap? + x1 = clamp(x+1, 0, w-1); + y0 = clamp(y-1, 0, h-1); + + Neighbor & e = edm[y * w + x]; + checkNeighbour(x, y, &e, edm[y0 * w + x0]); + checkNeighbour(x, y, &e, edm[y0 * w + x]); + checkNeighbour(x, y, &e, edm[y0 * w + x1]); + checkNeighbour(x, y, &e, edm[y * w + x0]); + } + + for( x = w-1; x >= 0; x-- ) { + x1 = clamp(x+1, 0, w-1); + + Neighbor & e = edm[y * w + x]; + checkNeighbour(x, y, &e, edm[y * w + x1]); + } + } + + // Third pass. + for( y = h-1; y >= 0; y-- ) { + for( x = w-1; x >= 0; x-- ) { + x0 = clamp(x-1, 0, w-1); + x1 = clamp(x+1, 0, w-1); + y1 = clamp(y+1, 0, h-1); + + Neighbor & e = edm[y * w + x]; + checkNeighbour(x, y, &e, edm[y * w + x1]); + checkNeighbour(x, y, &e, edm[y1 * w + x0]); + checkNeighbour(x, y, &e, edm[y1 * w + x]); + checkNeighbour(x, y, &e, edm[y1 * w + x1]); + } + + for( x = 0; x < w; x++ ) { + x0 = clamp(x-1, 0, w-1); + + Neighbor & e = edm[y * w + x]; + checkNeighbour(x, y, &e, edm[y * w + x0]); + } + } + + // Fill empty holes. + for( y = 0; y < h; y++ ) { + for( x = 0; x < w; x++ ) { + const int sx = edm[y * w + x].x; + const int sy = edm[y * w + x].y; + nvDebugCheck(sx < w && sy < h); + + if( sx != x || sy != y ) { + for(int c = 0; c < count; c++ ) { + img->setPixel(img->pixel(sx, sy, c), x, y, c); + } + } + } + } + +} + + +void nv::fillBlur(FloatImage * img, const BitMap * bmap) +{ + nvCheck(img != NULL); + + // @@ Apply a 3x3 kernel. +} + + +static bool downsample(const FloatImage * src, const BitMap * srcMask, const FloatImage ** _dst, const BitMap ** _dstMask) +{ + const uint w = src->width(); + const uint h = src->height(); + const uint count = src->componentNum(); + + // count holes in srcMask, return false if fully filled. + uint holes = 0; + for(uint y = 0; y < h; y++) { + for(uint x = 0; x < w; x++) { + holes += srcMask->bitAt(x, y) == 0; + } + } + if (holes == 0 || (w == 2 || h == 2)) { + // Stop when no holes or when the texture is very small. + return false; + } + + // Apply box filter to image and mask and return true. + const uint nw = w / 2; + const uint nh = h / 2; + + FloatImage * dst = new FloatImage(); + dst->allocate(count, nw, nh); + BitMap * dstMask = new BitMap(nw, nh); + + for(uint c = 0; c < count; c++) { + for(uint y = 0; y < nh; y++) { + for(uint x = 0; x < nw; x++) { + + const uint x0 = 2 * x + 0; + const uint x1 = 2 * x + 1; + const uint y0 = 2 * y + 0; + const uint y1 = 2 * y + 1; + + const float f0 = src->pixel(x0, y0, c); + const float f1 = src->pixel(x1, y0, c); + const float f2 = src->pixel(x0, y1, c); + const float f3 = src->pixel(x1, y1, c); + + const bool b0 = srcMask->bitAt(x0, y0); + const bool b1 = srcMask->bitAt(x1, y0); + const bool b2 = srcMask->bitAt(x0, y1); + const bool b3 = srcMask->bitAt(x1, y1); + + if (b0 || b1 || b2 || b3) { + // Set bit mask. + dstMask->setBitAt(x, y); + + // Set pixel. + float value = 0.0f; + int total = 0; + if (b0) { value += f0; total++; } + if (b1) { value += f1; total++; } + if (b2) { value += f2; total++; } + if (b3) { value += f3; total++; } + dst->setPixel(value / total, x, y, c); + } + } + } + } + + *_dst = dst; + *_dstMask = dstMask; + + return true; +} + +// This is the filter used in the Lumigraph paper. +void nv::fillPullPush(FloatImage * img, const BitMap * bmap) +{ + nvCheck(img != NULL); + + const uint count = img->componentNum(); + const uint w = img->width(); + const uint h = img->height(); + const uint num = log2(max(w,h)); + + // Build mipmap chain. + Array mipmaps(num); + Array mipmapMasks(num); + + mipmaps.append(img); + mipmapMasks.append(bmap); + + const FloatImage * current; + const BitMap * currentMask; + + // Compute mipmap chain. + while(downsample(mipmaps.back(), mipmapMasks.back(), ¤t, ¤tMask)) + { + mipmaps.append(current); + mipmapMasks.append(currentMask); + } + + // Sample mipmaps until non-hole is found. + for(uint y = 0; y < h; y++) { + for(uint x = 0; x < w; x++) { + + int sx = x; + int sy = y; + //float sx = x; + //float sy = y; + + const uint levelCount = mipmaps.count(); + for (uint l = 0; l < levelCount; l++) + { + //const float fx = sx / mipmaps[l]->width(); + //const float fy = sy / mipmaps[l]->height(); + + if (mipmapMasks[l]->bitAt(sx, sy)) + { + // Sample mipmaps[l](sx, sy) and copy to img(x, y) + for(uint c = 0; c < count; c++) { + //img->setPixel(mipmaps[l]->linear_clamp(fx, fy, c), x, y, c); + img->setPixel(mipmaps[l]->pixel(sx, sy, c), x, y, c); + } + break; + } + + sx /= 2; + sy /= 2; + } + } + } + + // Don't delete the original image and mask. + mipmaps[0] = NULL; + mipmapMasks[0] = NULL; + + // Delete the mipmaps. + deleteAll(mipmaps); + deleteAll(mipmapMasks); +} + + + +/* + +This Code is from Charles Bloom: + +DoPixelSeamFix +10-20-02 + +Looks in the 5x5 local neighborhood (LocalPixels) of the desired pixel to fill. +It tries to build a quadratic model of the neighborhood surface to use in +extrapolating. You need 5 pixels to establish a 2d quadratic curve. + +This is really just a nice generic way to extrapolate pixels. It also happens +to work great for seam-fixing. + +Note that I'm working on normals, but I treat them just as 3 scalars and normalize +at the end. To be more correct, I would work on the surface of a sphere, but that +just seems like way too much work. + +*/ + +struct LocalPixels +{ + // 5x5 neighborhood + // the center is at result + // index [y][x] + bool fill[5][5]; + float data[5][5]; + + mutable float result; + mutable float weight; + + bool Quad3SubH(float * pQ, int row) const + { + const bool * pFill = fill[row]; + const float * pDat = data[row]; + + if ( pFill[1] && pFill[2] && pFill[3] ) + { + // good row + *pQ = pDat[1] - 2.f * pDat[2] + pDat[3]; + return true; + } + else if ( pFill[0] && pFill[1] && pFill[2] ) + { + // good row + *pQ = pDat[0] - 2.f * pDat[1] + pDat[2]; + return true; + } + else if ( pFill[2] && pFill[3] && pFill[4] ) + { + // good row + *pQ = pDat[2] - 2.f * pDat[3] + pDat[4]; + return true; + } + return false; + } + + // improve result with a horizontal quad in row 1 and/or + bool Quad3SubV(float * pQ, int col) const + { + if ( fill[1][col] && fill[2][col] && fill[3][col] ) + { + // good row + *pQ = data[1][col] - 2.f * data[2][col] + data[3][col]; + return true; + } + else if ( fill[0][col] && fill[1][col] && fill[2][col] ) + { + // good row + *pQ = data[0][col] - 2.f * data[1][col] + data[2][col]; + return true; + } + else if ( fill[2][col] && fill[3][col] && fill[4][col] ) + { + // good row + *pQ = data[2][col] - 2.f * data[3][col] + data[4][col]; + return true; + } + return false; + } + + bool Quad3H(float * pQ) const + { + if (!Quad3SubH(pQ,1)) + { + return Quad3SubH(pQ,3); + } + float q = 0.0f; // initializer not needed, just make it shut up + if (Quad3SubH(&q, 3)) + { + // got q and pQ + *pQ = (*pQ+q)*0.5f; + } + return true; + } + + bool Quad3V(float * pQ) const + { + if (!Quad3SubV(pQ, 1)) + { + return Quad3SubV(pQ, 3); + } + float q = 0.0f; // initializer not needed, just make it shut up + if (Quad3SubV(&q, 3)) + { + // got q and pQ + *pQ = (*pQ + q) * 0.5f; + } + return true; + } + // Quad returns ([0]+[2] - 2.f*[1]) + // a common want is [1] - ([0]+[2])*0.5f ; + // so use -0.5f*Quad + + bool tryQuads() const + { + bool res = false; + + // look for a pair that straddles the middle: + if ( fill[2][1] && fill[2][3] ) + { + // got horizontal straddle + float q; + if ( Quad3H(&q) ) + { + result += (data[2][1] + data[2][3] - q) * 0.5f; + weight += 1.f; + res = true; + } + } + if ( fill[1][2] && fill[3][2] ) + { + // got vertical straddle + float q; + if ( Quad3V(&q) ) + { + result += (data[1][2] + data[3][2] - q) * 0.5f; + weight += 1.f; + res = true; + } + } + + // look for pairs that lead into the middle : + if ( fill[2][0] && fill[2][1] ) + { + // got left-side pair + float q; + if ( Quad3H(&q) ) + { + result += data[2][1]*2.f - data[2][0] + q; + weight += 1.f; + res = true; + } + } + if ( fill[2][3] && fill[2][4] ) + { + // got right-side pair + float q; + if ( Quad3H(&q) ) + { + result += data[2][3]*2.f - data[2][4] + q; + weight += 1.f; + res = true; + } + } + if ( fill[0][2] && fill[1][2] ) + { + // got left-side pair + float q; + if ( Quad3V(&q) ) + { + result += data[1][2]*2.f - data[0][2] + q; + weight += 1.f; + res = true; + } + } + if ( fill[3][2] && fill[4][2] ) + { + // got right-side pair + float q; + if ( Quad3V(&q) ) + { + result += data[3][2]*2.f - data[4][2] + q; + weight += 1.f; + res = true; + } + } + return res; + } + + bool tryPlanar() const + { + // four cases : + const int indices[] = + { + 2,1, 1,2, 1,1, + 2,1, 3,2, 3,1, + 2,3, 1,2, 1,3, + 2,3, 3,2, 3,3 + }; + bool res = false; + for (int i = 0; i < 4; i++) + { + const int * I = indices + i*6; + if (!fill[ I[0] ][ I[1] ]) + continue; + if (!fill[ I[2] ][ I[3] ]) + continue; + if (!fill[ I[4] ][ I[5] ]) + continue; + + result += data[ I[0] ][ I[1] ] + data[ I[2] ][ I[3] ] - data[ I[4] ][ I[5] ]; + weight += 1.0f; + res = true; + } + return res; + } + + bool tryTwos() const + { + bool res = false; + + if (fill[2][1] && fill[2][3]) + { + result += (data[2][1] + data[2][3]) * 0.5f; + weight += 1.0f; + res = true; + } + if (fill[1][2] && fill[3][2]) + { + result += (data[1][2] + data[3][2]) * 0.5f; + weight += 1.0f; + res = true; + } + + // four side-rotates : + const int indices[] = + { + 2,1, 2,0, + 2,3, 2,4, + 1,2, 0,2, + 3,2, 4,2, + }; + for (int i = 0; i < 4; i++) + { + const int * I = indices + i*4; + if (!fill[ I[0] ][ I[1] ]) + continue; + if (!fill[ I[2] ][ I[3] ]) + continue; + + result += data[ I[0] ][ I[1] ]*2.0f - data[ I[2] ][ I[3] ]; + weight += 1.0f; + res = true; + } + + return res; + } + + bool doLocalPixelFill() const + { + result = 0.0f; + weight = 0.0f; + + if (tryQuads()) { + return true; + } + + if (tryPlanar()) { + return true; + } + + return tryTwos(); + } + +}; // struct LocalPixels + + + +// This is a quadratic extrapolation filter from Charles Bloom (DoPixelSeamFix). Used with his permission. +void nv::fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/) +{ + nvCheck(passCount > 0); + nvCheck(img != NULL); + nvCheck(bmap != NULL); + + const int w = img->width(); + const int h = img->height(); + const int count = img->componentNum(); + + nvCheck(bmap->width() == uint(w)); + nvCheck(bmap->height() == uint(h)); + + AutoPtr newbmap( new BitMap(w, h) ); + + float * coverageChannel = NULL; + if (coverageIndex != -1) + { + coverageChannel = img->channel(coverageIndex); + } + + int firstChannel = -1; + + for (int p = 0; p < passCount; p++) + { + for (int c = 0; c < count; c++) + { + if (c == coverageIndex) continue; + if (firstChannel == -1) firstChannel = c; + + float * channel = img->channel(c); + + for (int yb = 0; yb < h; yb++) { + for (int xb = 0; xb < w; xb++) { + + if (bmap->bitAt(xb, yb)) { + // Not a hole. + newbmap->setBitAt(xb, yb); + continue; + } + + int numFill = 0; + + LocalPixels lp; + for (int ny = 0; ny < 5; ny++) + { + int y = (yb + ny - 2); + if ( y < 0 || y >= h ) + { + // out of range + for(int i = 0; i < 5; i++) + { + lp.fill[ny][i] = false; + } + continue; + } + + for (int nx = 0; nx < 5; nx++) + { + int x = (xb + nx - 2); + if (x < 0 || x >= w) + { + lp.fill[ny][nx] = false; + } + else + { + int idx = img->index(x, y); + if (!bmap->bitAt(idx)) + { + lp.fill[ny][nx] = false; + } + else + { + lp.fill[ny][nx] = true; + lp.data[ny][nx] = channel[idx]; + numFill++; + } + } + } + } + + // need at least 3 to do anything decent + if (numFill < 2) + continue; + + nvDebugCheck(lp.fill[2][2] == false); + + if (lp.doLocalPixelFill()) + { + const int idx = img->index(xb, yb); + channel[idx] = lp.result / lp.weight; + + if (c == firstChannel) + { + //coverageChannel[idx] /= lp.weight; // @@ Not sure what this was for, coverageChannel[idx] is always zero. + newbmap->setBitAt(xb, yb); + } + } + } + } + } + + // Update the bit mask. + swap(*newbmap, *bmap); + } +} diff --git a/src/nvimage/HoleFilling.h b/src/nvimage/HoleFilling.h new file mode 100644 index 0000000..b437e87 --- /dev/null +++ b/src/nvimage/HoleFilling.h @@ -0,0 +1,96 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_IMAGE_HOLEFILLING_H +#define NV_IMAGE_HOLEFILLING_H + +#include +#include + +namespace nv +{ + class FloatImage; + + /// Bit mask. + class BitMap + { + public: + BitMap(uint w, uint h) : + m_width(w), m_height(h), m_bitArray(w*h) + { + } + + const uint width() const { return m_width; } + const uint height() const { return m_height; } + + bool bitAt(uint x, uint y) const + { + nvDebugCheck(x < m_width && y < m_height); + return m_bitArray.bitAt(y * m_width + x); + } + bool bitAt(uint idx) const + { + return m_bitArray.bitAt(idx); + } + + void setBitAt(uint x, uint y) + { + nvDebugCheck(x < m_width && y < m_height); + m_bitArray.setBitAt(y * m_width + x); + } + void setBitAt(uint idx) + { + m_bitArray.setBitAt(idx); + } + + void clearBitAt(uint x, uint y) + { + nvDebugCheck(x < m_width && y < m_height); + m_bitArray.clearBitAt(y * m_width + x); + } + void clearBitAt(uint idx) + { + m_bitArray.clearBitAt(idx); + } + + void clearAll() + { + m_bitArray.clearAll(); + } + + void setAll() + { + m_bitArray.setAll(); + } + + void toggleAll() + { + m_bitArray.toggleAll(); + } + + friend void swap(BitMap & a, BitMap & b) + { + nvCheck(a.m_width == b.m_width); + nvCheck(a.m_height == b.m_height); + //swap(const_cast(a.m_width), const_cast(b.m_width)); + //swap(const_cast(a.m_height), const_cast(b.m_height)); + swap(a.m_bitArray, b.m_bitArray); + } + + private: + + const uint m_width; + const uint m_height; + BitArray m_bitArray; + + }; + + NVIMAGE_API void fillVoronoi(FloatImage * img, const BitMap * bmap); + NVIMAGE_API void fillBlur(FloatImage * img, const BitMap * bmap); + NVIMAGE_API void fillPullPush(FloatImage * img, const BitMap * bmap); + + NVIMAGE_API void fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap); + NVIMAGE_API void fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1); + +} // nv namespace + +#endif // NV_IMAGE_HOLEFILLING_H diff --git a/src/nvimage/Image.cpp b/src/nvimage/Image.cpp index 6676a13..2307d5c 100644 --- a/src/nvimage/Image.cpp +++ b/src/nvimage/Image.cpp @@ -1,13 +1,12 @@ // This code is in the public domain -- castanyo@yahoo.es -#include -#include +#include +#include #include -#include -#include -#include // swap +#include +#include using namespace nv; @@ -41,7 +40,7 @@ void Image::allocate(uint w, uint h) { m_width = w; m_height = h; - m_data = (Color32 *)nv::mem::realloc(m_data, w * h * sizeof(Color32)); + m_data = (Color32 *)realloc(m_data, w * h * sizeof(Color32)); } bool Image::load(const char * name) diff --git a/src/nvimage/ImageIO.cpp b/src/nvimage/ImageIO.cpp index cd59f3e..0b24600 100644 --- a/src/nvimage/ImageIO.cpp +++ b/src/nvimage/ImageIO.cpp @@ -1,23 +1,21 @@ // This code is in the public domain -- castanyo@yahoo.es +#include +#include +#include +#include +//#include // @@ Disable temporarily +#include + +#include + #include "ImageIO.h" #include "Image.h" #include "FloatImage.h" #include "TgaFile.h" #include "PsdFile.h" -#include - -#include -#include -#include -#include - // Extern -#if defined(HAVE_FREEIMAGE) -# include -#else - #if defined(HAVE_JPEG) extern "C" { # include @@ -42,58 +40,24 @@ extern "C" { # include #endif -#endif // defined(HAVE_FREEIMAGE) - using namespace nv; -namespace nv -{ - namespace ImageIO - { - #if defined(HAVE_FREEIMAGE) - - static Image * loadFreeImage(FREE_IMAGE_FORMAT fif, Stream & s); - static FloatImage * loadFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s); - - static bool saveFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const Image * img, const ImageMetaData * tags); - static bool saveFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const FloatImage * img, uint base_component, uint num_components); - - #else // defined(HAVE_FREEIMAGE) - - struct Color555 { - uint16 b : 5; - uint16 g : 5; - uint16 r : 5; - }; - - static Image * loadTGA(Stream & s); - static bool saveTGA(Stream & s, const Image * img); - - static Image * loadPSD(Stream & s); +namespace { - #if defined(HAVE_PNG) - static Image * loadPNG(Stream & s); - static bool savePNG(Stream & s, const Image * img, const ImageMetaData * tags); - #endif + // Array of image load plugins. +// static HashMap s_plugin_load_map; - #if defined(HAVE_JPEG) - static Image * loadJPG(Stream & s); - #endif - - #if defined(HAVE_TIFF) - static FloatImage * loadFloatTIFF(const char * fileName, Stream & s); - static bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components); - #endif - - #if defined(HAVE_OPENEXR) - static FloatImage * loadFloatEXR(const char * fileName, Stream & s); - static bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components); - #endif - - #endif // defined(HAVE_FREEIMAGE) + // Array of image save plugins. +// static HashMap s_plugin_save_map; + + struct Color555 { + uint16 b : 5; + uint16 g : 5; + uint16 r : 5; + }; + +} // namespace - } // ImageIO namespace -} // nv namespace Image * nv::ImageIO::load(const char * fileName) { @@ -114,15 +78,9 @@ Image * nv::ImageIO::load(const char * fileName, Stream & s) nvDebugCheck(s.isLoading()); const char * extension = Path::extension(fileName); - -#if defined(HAVE_FREEIMAGE) - FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); - if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { - return loadFreeImage(fif, s); - } -#else // defined(HAVE_FREEIMAGE) + if (strCaseCmp(extension, ".tga") == 0) { - return loadTGA(s); + return ImageIO::loadTGA(s); } #if defined(HAVE_JPEG) if (strCaseCmp(extension, ".jpg") == 0 || strCaseCmp(extension, ".jpeg") == 0) { @@ -137,39 +95,26 @@ Image * nv::ImageIO::load(const char * fileName, Stream & s) if (strCaseCmp(extension, ".psd") == 0) { return loadPSD(s); } -#endif // defined(HAVE_FREEIMAGE) - + // @@ use image plugins? return NULL; } -bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, const ImageMetaData * tags/*=NULL*/) +bool nv::ImageIO::save(const char * fileName, Stream & s, Image * img) { nvDebugCheck(fileName != NULL); nvDebugCheck(s.isSaving()); nvDebugCheck(img != NULL); -#if defined(HAVE_FREEIMAGE) - FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); - if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { - return saveFreeImage(fif, s, img, tags); - } -#else const char * extension = Path::extension(fileName); if (strCaseCmp(extension, ".tga") == 0) { - return saveTGA(s, img); - } -#if defined(HAVE_PNG) - if (strCaseCmp(extension, ".png") == 0) { - return savePNG(s, img, tags); + return ImageIO::saveTGA(s, img); } -#endif -#endif return false; } -bool nv::ImageIO::save(const char * fileName, const Image * img, const ImageMetaData * tags/*=NULL*/) +bool nv::ImageIO::save(const char * fileName, Image * img) { nvDebugCheck(fileName != NULL); nvDebugCheck(img != NULL); @@ -180,7 +125,7 @@ bool nv::ImageIO::save(const char * fileName, const Image * img, const ImageMeta return false; } - return ImageIO::save(fileName, stream, img, tags); + return ImageIO::save(fileName, stream, img); } FloatImage * nv::ImageIO::loadFloat(const char * fileName) @@ -201,14 +146,7 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s) nvDebugCheck(fileName != NULL); const char * extension = Path::extension(fileName); - -#if defined(HAVE_FREEIMAGE) - FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); - if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { - return loadFloatFreeImage(fif, s); - } -#else // defined(HAVE_FREEIMAGE) -#pragma message(NV_FILE_LINE "TODO: Load TIFF and EXR files from stream.") + #if defined(HAVE_TIFF) if (strCaseCmp(extension, ".tif") == 0 || strCaseCmp(extension, ".tiff") == 0) { return loadFloatTIFF(fileName, s); @@ -219,385 +157,59 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s) return loadFloatEXR(fileName, s); } #endif -#endif // defined(HAVE_FREEIMAGE) - - return NULL; -} -bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage * fimage, uint baseComponent, uint componentCount) -{ - if (componentCount == 0) - { - componentCount = fimage->componentNum() - baseComponent; - } - if (baseComponent + componentCount < fimage->componentNum()) - { - return false; - } - -#if defined(HAVE_FREEIMAGE) - FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); - if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { - return saveFloatFreeImage(fif, s, fimage, baseComponent, componentCount); +/* // @@ Disable temporarily + if (strCaseCmp(extension, ".pfm") == 0) { + return loadFloatPFM(fileName, s); } -#else // defined(HAVE_FREEIMAGE) - //if (componentCount == 3 || componentCount == 4) - if (componentCount <= 4) - { - AutoPtr image(fimage->createImage(baseComponent, componentCount)); - nvCheck(image != NULL); - - if (componentCount == 1) - { - Color32 * c = image->pixels(); - const uint count = image->width() * image->height(); - for (uint i = 0; i < count; i++) - { - c[i].b = c[i].g = c[i].r; - } - } - - if (componentCount == 4) - { - image->setFormat(Image::Format_ARGB); - } - - return ImageIO::save(fileName, image.ptr()); - } -#endif // defined(HAVE_FREEIMAGE) +*/ - return false; + return NULL; } -bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount) + +bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) { const char * extension = Path::extension(fileName); -#if !defined(HAVE_FREEIMAGE) #if defined(HAVE_OPENEXR) - if (strCaseCmp(extension, ".exr") == 0) { - return saveFloatEXR(fileName, fimage, baseComponent, componentCount); - } -#endif -#if defined(HAVE_TIFF) - if (strCaseCmp(extension, ".tif") == 0 || strCaseCmp(extension, ".tiff") == 0) { - return saveFloatTIFF(fileName, fimage, baseComponent, componentCount); - } -#endif -#endif // defined(HAVE_FREEIMAGE) - - StdInputStream stream(fileName); - - if (stream.isError()) { - return false; - } - - return saveFloat(fileName, stream, fimage, baseComponent, componentCount); -} - -#if defined(HAVE_FREEIMAGE) - -static unsigned DLL_CALLCONV ReadProc(void *buffer, unsigned size, unsigned count, fi_handle handle) -{ - Stream * s = (Stream *) handle; - s->serialize(buffer, size * count); - return count; -} - -static unsigned DLL_CALLCONV WriteProc(void *buffer, unsigned size, unsigned count, fi_handle handle) -{ - Stream * s = (Stream *) handle; - s->serialize(buffer, size * count); - return count; -} - -static int DLL_CALLCONV SeekProc(fi_handle handle, long offset, int origin) -{ - Stream * s = (Stream *) handle; - - switch(origin) { - case SEEK_SET : - s->seek(offset); - break; - case SEEK_CUR : - s->seek(s->tell() + offset); - break; - default : - return 1; - } - - return 0; -} - -static long DLL_CALLCONV TellProc(fi_handle handle) -{ - Stream * s = (Stream *) handle; - return s->tell(); -} - - -Image * nv::ImageIO::loadFreeImage(FREE_IMAGE_FORMAT fif, Stream & s) -{ - nvCheck(!s.isError()); - - FreeImageIO io; - io.read_proc = ReadProc; - io.write_proc = NULL; - io.seek_proc = SeekProc; - io.tell_proc = TellProc; - - FIBITMAP * bitmap = FreeImage_LoadFromHandle(fif, &io, (fi_handle)&s, 0); - - if (bitmap == NULL) - { - return NULL; - } - - const int w = FreeImage_GetWidth(bitmap); - const int h = FreeImage_GetHeight(bitmap); - - if (FreeImage_GetImageType(bitmap) == FIT_BITMAP) - { - if (FreeImage_GetBPP(bitmap) != 32) - { - FIBITMAP * tmp = FreeImage_ConvertTo32Bits(bitmap); - FreeImage_Unload(bitmap); - bitmap = tmp; - } - } - else + if (strCaseCmp(extension, ".exr") == 0) { - // @@ Use tone mapping? - FIBITMAP * tmp = FreeImage_ConvertToType(bitmap, FIT_BITMAP, true); - FreeImage_Unload(bitmap); - bitmap = tmp; - } - - - Image * image = new Image(); - image->allocate(w, h); - - // Copy the image over to our internal format, FreeImage has the scanlines bottom to top though. - for (int y=0; y < h; y++) - { - const void * src = FreeImage_GetScanLine(bitmap, h - y - 1); - void * dst = image->scanline(y); - - memcpy(dst, src, 4 * w); - } - - FreeImage_Unload(bitmap); - - return image; -} - -FloatImage * nv::ImageIO::loadFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s) -{ - nvCheck(!s.isError()); - - FreeImageIO io; - io.read_proc = ReadProc; - io.write_proc = NULL; - io.seek_proc = SeekProc; - io.tell_proc = TellProc; - - FIBITMAP * bitmap = FreeImage_LoadFromHandle(fif, &io, (fi_handle)&s, 0); - - if (bitmap == NULL) - { - return NULL; - } - - const int w = FreeImage_GetWidth(bitmap); - const int h = FreeImage_GetHeight(bitmap); - - FREE_IMAGE_TYPE fit = FreeImage_GetImageType(bitmap); - - FloatImage * floatImage = new FloatImage(); - - switch (fit) - { - case FIT_FLOAT: - floatImage->allocate(1, w, h); - - for (int y=0; y < h; y++) - { - const float * src = (const float *)FreeImage_GetScanLine(bitmap, h - y - 1 ); - float * dst = floatImage->scanline(y, 0); - - for (int x=0; x < w; x++) - { - dst[x] = src[x]; - } - } - break; - case FIT_UINT16: - floatImage->allocate(1, w, h); - - for (int y=0; y < h; y++) - { - const uint16 * src = (const uint16 *)FreeImage_GetScanLine(bitmap, h - y - 1 ); - float * dst = floatImage->scanline(y, 0); - - for (int x=0; x < w; x++) - { - dst[x] = float(src[x]) / 65535; - } - } - break; - case FIT_COMPLEX: - floatImage->allocate(2, w, h); - - for (int y=0; y < h; y++) - { - const FICOMPLEX * src = (const FICOMPLEX *)FreeImage_GetScanLine(bitmap, h - y - 1 ); - - float * dst_real = floatImage->scanline(y, 0); - float * dst_imag = floatImage->scanline(y, 1); - - for (int x=0; x < w; x++) - { - dst_real[x] = (float)src[x].r; - dst_imag[x] = (float)src[x].i; - } - } - break; - case FIT_RGBF: - floatImage->allocate(3, w, h); - - for (int y=0; y < h; y++) - { - const FIRGBF * src = (const FIRGBF *)FreeImage_GetScanLine(bitmap, h - y - 1 ); - - float * dst_red = floatImage->scanline(y, 0); - float * dst_green = floatImage->scanline(y, 1); - float * dst_blue = floatImage->scanline(y, 2); - - for (int x=0; x < w; x++) - { - dst_red[x] = src[x].red; - dst_green[x] = src[x].green; - dst_blue[x] = src[x].blue; - } - } - break; - case FIT_RGBAF: - floatImage->allocate(4, w, h); - - for (int y=0; y < h; y++) - { - const FIRGBAF * src = (const FIRGBAF *)FreeImage_GetScanLine(bitmap, h - y - 1 ); - - float * dst_red = floatImage->scanline(y, 0); - float * dst_green = floatImage->scanline(y, 1); - float * dst_blue = floatImage->scanline(y, 2); - float * dst_alpha = floatImage->scanline(y, 3); - - for (int x=0; x < w; x++) - { - dst_red[x] = src[x].red; - dst_green[x] = src[x].green; - dst_blue[x] = src[x].blue; - dst_alpha[x] = src[x].alpha; - } - } - break; - default: - delete floatImage; - floatImage = NULL; - } - - FreeImage_Unload(bitmap); - - return floatImage; -} - -bool nv::ImageIO::saveFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const Image * img, const ImageMetaData * tags) -{ - nvCheck(!s.isError()); - - FreeImageIO io; - io.read_proc = NULL; - io.write_proc = WriteProc; - io.seek_proc = SeekProc; - io.tell_proc = TellProc; - - const uint w = img->width(); - const uint h = img->height(); - - FIBITMAP * bitmap = FreeImage_Allocate(w, h, 32); - - for (uint i = 0; i < h; i++) - { - uint8 * scanline = FreeImage_GetScanLine(bitmap, i); - memcpy(scanline, img->scanline(h - i - 1), w * sizeof(Color32)); + return ImageIO::saveFloatEXR(fileName, fimage, base_component, num_components); } +#endif - if (tags != NULL) +#if defined(HAVE_TIFF) + if (strCaseCmp(extension, ".tif") == 0 || strCaseCmp(extension, ".tiff") == 0) { -#pragma message(NV_FILE_LINE "TODO: Save image metadata") - //FreeImage_SetMetadata( + return ImageIO::saveFloatTIFF(fileName, fimage, base_component, num_components); } +#endif - bool result = FreeImage_SaveToHandle(fif, bitmap, &io, (fi_handle)&s, 0) != 0; - - FreeImage_Unload(bitmap); - - return result; -} - -bool nv::ImageIO::saveFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const FloatImage * img, uint baseComponent, uint componentCount) -{ - nvCheck(!s.isError()); - - FreeImageIO io; - io.read_proc = NULL; - io.write_proc = WriteProc; - io.seek_proc = SeekProc; - io.tell_proc = TellProc; - - const uint w = img->width(); - const uint h = img->height(); - - FREE_IMAGE_TYPE type; - if (componentCount == 1) - { - type = FIT_FLOAT; - } - else if (componentCount == 3) +/* // @@ Disable Temporarily + if (strCaseCmp(extension, ".pfm") == 0) { - type = FIT_RGBF; +// return ImageIO::saveFloatPFM(fileName, fimage, base_component, num_components); } - else if (componentCount == 4) - { - type = FIT_RGBAF; - } - - FIBITMAP * bitmap = FreeImage_AllocateT(type, w, h); +*/ - for (uint y = 0; y < h; y++) + if (num_components == 3 || num_components == 4) { - float * scanline = (float *)FreeImage_GetScanLine(bitmap, y); + AutoPtr image(fimage->createImage(base_component, num_components)); + nvCheck(image != NULL); - for (uint x = 0; x < w; x++) + if (num_components == 4) { - for (uint c = 0; c < componentCount; c++) - { - scanline[x * componentCount + c] = img->pixel(x, y, baseComponent + c); - } + image->setFormat(Image::Format_ARGB); } - } - bool result = FreeImage_SaveToHandle(fif, bitmap, &io, (fi_handle)&s, 0) != 0; - - FreeImage_Unload(bitmap); + return ImageIO::save(fileName, image.ptr()); + } - return result; + return false; } -#else // defined(HAVE_FREEIMAGE) - /// Load TGA image. Image * nv::ImageIO::loadTGA(Stream & s) { @@ -620,7 +232,7 @@ Image * nv::ImageIO::loadTGA(Stream & s) // no break is intended! case TGA_TYPE_INDEXED: if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) { - nvDebug( "*** loadTGA: Error, only 24bit paletted images are supported.\n" ); + nvDebug( "*** ImageIO::loadTGA: Error, only 24bit paletted images are supported.\n" ); return false; } pal = true; @@ -641,7 +253,7 @@ Image * nv::ImageIO::loadTGA(Stream & s) break; default: - nvDebug( "*** loadTGA: Error, unsupported image type.\n" ); + nvDebug( "*** ImageIO::loadTGA: Error, unsupported image type.\n" ); return false; } @@ -990,10 +602,10 @@ Image * nv::ImageIO::loadPSD(Stream & s) static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length) { nvDebugCheck(png_ptr != NULL); - + Stream * s = (Stream *)png_ptr->io_ptr; s->serialize(data, (int)length); - + if (s->isError()) { png_error(png_ptr, "Read Error"); } @@ -1003,7 +615,7 @@ static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t lengt Image * nv::ImageIO::loadPNG(Stream & s) { nvCheck(!s.isError()); - + // Set up a read buffer and check the library version png_structp png_ptr; png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); @@ -1126,7 +738,7 @@ Image * nv::ImageIO::loadPNG(Stream & s) Color32 c = img->pixel(i); img->pixel(i) = Color32(c.b, c.g, c.r, c.a); } - + // Compute alpha channel if needed. /*if( img->flags & PI_IU_BUMPMAP || img->flags & PI_IU_ALPHAMAP ) { if( img->flags & PI_IF_HAS_COLOR && !(img->flags & PI_IF_HAS_ALPHA)) { @@ -1137,100 +749,6 @@ Image * nv::ImageIO::loadPNG(Stream & s) return img.release(); } -static void user_write_data(png_structp png_ptr, png_bytep data, png_size_t length) -{ - nvDebugCheck(png_ptr != NULL); - - Stream * s = (Stream *)png_ptr->io_ptr; - s->serialize(data, (int)length); - - if (s->isError()) { - png_error(png_ptr, "Write Error"); - } -} - -static void user_write_flush(png_structp png_ptr) { } - -bool nv::ImageIO::savePNG(Stream & s, const Image * img, const ImageMetaData * tags/*=NULL*/) -{ - nvCheck(!s.isError()); - nvCheck(img != NULL); - nvCheck(img->pixels() != NULL); - - // Set up a write buffer and check the library version - png_structp png_ptr; - png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); - if (png_ptr == NULL) { - return false; - } - - // Allocate/initialize a memory block for the image information - png_infop info_ptr = png_create_info_struct(png_ptr); - if (info_ptr == NULL) { - png_destroy_write_struct(&png_ptr, NULL); - return false; - } - - // Set up the error handling - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_write_struct(&png_ptr, &info_ptr); - return false; - } - - // Set up the I/O functions. - png_set_write_fn(png_ptr, (void*)&s, user_write_data, user_write_flush); - - // Set image header information - int color_type = PNG_COLOR_TYPE_RGB; - switch(img->format()) - { - case Image::Format_RGB: color_type = PNG_COLOR_TYPE_RGB; break; - case Image::Format_ARGB: color_type = PNG_COLOR_TYPE_RGBA; break; - } - png_set_IHDR(png_ptr, info_ptr, img->width(), img->height(), - 8, color_type, PNG_INTERLACE_NONE, - PNG_COMPRESSION_TYPE_DEFAULT, - PNG_FILTER_TYPE_DEFAULT); - - // Set image data - png_bytep * row_data = new png_bytep[sizeof(png_byte) * img->height()]; - for (uint i = 0; i < img->height(); i++) { - row_data[i] = (png_byte*)img->scanline (i); - } - png_set_rows(png_ptr, info_ptr, row_data); - - png_text * text = NULL; - if (tags != NULL && tags->tagMap.count() > 0) - { - text = new png_text[tags->tagMap.count()]; - memset(text, 0, tags->tagMap.count() * sizeof(png_text)); - int n = 0; - foreach (i, tags->tagMap) - { - text[n].compression = PNG_TEXT_COMPRESSION_NONE; - text[n].key = const_cast (tags->tagMap[i].key.str()); - text[n].text = const_cast (tags->tagMap[i].value.str()); - n++; - } - png_set_text(png_ptr, info_ptr, text, tags->tagMap.count()); - } - - png_write_png(png_ptr, info_ptr, - // component order is BGR(A) - PNG_TRANSFORM_BGR - // Strip alpha byte for RGB images - | (img->format() == Image::Format_RGB ? PNG_TRANSFORM_STRIP_FILLER : 0), - NULL); - - // Finish things up - png_destroy_write_struct(&png_ptr, &info_ptr); - - delete [] row_data; - delete [] text; - - return true; -} - #endif // defined(HAVE_PNG) #if defined(HAVE_JPEG) @@ -1272,12 +790,12 @@ static void term_source (j_decompress_ptr /*cinfo*/){ Image * nv::ImageIO::loadJPG(Stream & s) { nvCheck(!s.isError()); - + // Read the entire file. Array byte_array; byte_array.resize(s.size()); - s.serialize(byte_array.mutableBuffer(), s.size()); - + s.serialize(byte_array.unsecureBuffer(), s.size()); + jpeg_decompress_struct cinfo; jpeg_error_mgr jerr; @@ -1359,7 +877,7 @@ static toff_t tiffSeekProc(thandle_t h, toff_t offset, int whence) { Stream * s = (Stream *)h; nvDebugCheck(s != NULL); - + if (!s->isSeekable()) { return (toff_t)-1; @@ -1408,16 +926,16 @@ static void tiffUnmapFileProc(thandle_t, tdata_t, toff_t) FloatImage * nv::ImageIO::loadFloatTIFF(const char * fileName, Stream & s) { nvCheck(!s.isError()); - + TIFF * tif = TIFFOpen(fileName, "r"); //TIFF * tif = TIFFClientOpen(fileName, "r", &s, tiffReadWriteProc, tiffReadWriteProc, tiffSeekProc, tiffCloseProc, tiffSizeProc, tiffMapFileProc, tiffUnmapFileProc); - + if (!tif) { nvDebug("Can't open '%s' for reading\n", fileName); return NULL; } - + ::uint16 spp, bpp, format; ::uint32 width, height; TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &height); @@ -1425,28 +943,28 @@ FloatImage * nv::ImageIO::loadFloatTIFF(const char * fileName, Stream & s) TIFFGetField(tif, TIFFTAG_BITSPERSAMPLE, &bpp); TIFFGetField(tif, TIFFTAG_SAMPLESPERPIXEL, &spp); TIFFGetField(tif, TIFFTAG_SAMPLEFORMAT, &format); - + if (bpp != 8 && bpp != 16 && bpp != 32) { nvDebug("Can't load '%s', only 1 sample per pixel supported\n", fileName); TIFFClose(tif); return NULL; } - + AutoPtr fimage(new FloatImage()); fimage->allocate(spp, width, height); - + int linesize = TIFFScanlineSize(tif); tdata_t buf = (::uint8 *)nv::mem::malloc(linesize); - - for (uint y = 0; y < height; y++) + + for (uint y = 0; y < height; y++) { TIFFReadScanline(tif, buf, y, 0); - for (uint c=0; cscanline(y, c); - for(uint x = 0; x < width; x++) + for(uint x = 0; x < width; x++) { if (bpp == 8) { @@ -1474,9 +992,9 @@ FloatImage * nv::ImageIO::loadFloatTIFF(const char * fileName, Stream & s) } nv::mem::free(buf); - + TIFFClose(tif); - + return fimage.release(); } @@ -1485,7 +1003,7 @@ bool nv::ImageIO::saveFloatTIFF(const char * fileName, const FloatImage * fimage nvCheck(fileName != NULL); nvCheck(fimage != NULL); nvCheck(base_component + num_components <= fimage->componentNum()); - + const int iW = fimage->width(); const int iH = fimage->height(); const int iC = num_components; @@ -1504,8 +1022,8 @@ bool nv::ImageIO::saveFloatTIFF(const char * fileName, const FloatImage * fimage TIFFSetField(image, TIFFTAG_SAMPLESPERPIXEL, iC); TIFFSetField(image, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_IEEEFP); TIFFSetField(image, TIFFTAG_BITSPERSAMPLE, 32); - - uint32 rowsperstrip = TIFFDefaultStripSize(image, (uint32)-1); + + uint32 rowsperstrip = TIFFDefaultStripSize(image, (uint32)-1); TIFFSetField(image, TIFFTAG_ROWSPERSTRIP, rowsperstrip); TIFFSetField(image, TIFFTAG_COMPRESSION, COMPRESSION_PACKBITS); @@ -1520,7 +1038,7 @@ bool nv::ImageIO::saveFloatTIFF(const char * fileName, const FloatImage * fimage float * scanline = new float[iW * iC]; for (int y = 0; y < iH; y++) { - for (int c = 0; c < iC; c++) + for (int c = 0; c < iC; c++) { const float * src = fimage->scanline(y, base_component + c); for (int x = 0; x < iW; x++) scanline[x * iC + c] = src[x]; @@ -1551,56 +1069,38 @@ namespace { nvDebugCheck(s.isLoading()); } - + virtual bool read(char c[], int n) { m_stream.serialize(c, n); - + if (m_stream.isError()) { throw Iex::InputExc("I/O error."); } - + return m_stream.isAtEnd(); } - + virtual Imf::Int64 tellg() { return m_stream.tell(); } - + virtual void seekg(Imf::Int64 pos) { - nvDebugCheck(pos >= 0 && pos < UINT_MAX); - m_stream.seek((uint)pos); + m_stream.seek(pos); } - + virtual void clear() { m_stream.clearError(); } - + private: Stream & m_stream; }; - static int channelIndexFromName(const char* name) - { - char c = tolower(name[0]); - switch (c) - { - default: - case 'r': - return 0; - case 'g': - return 1; - case 'b': - return 2; - case 'a': - return 3; - } - } - } // namespace FloatImage * nv::ImageIO::loadFloatEXR(const char * fileName, Stream & s) @@ -1617,31 +1117,30 @@ FloatImage * nv::ImageIO::loadFloatEXR(const char * fileName, Stream & s) int height = box.max.x - box.min.y + 1; const Imf::ChannelList & channels = inputFile.header().channels(); - + // Count channels. uint channelCount= 0; for (Imf::ChannelList::ConstIterator it = channels.begin(); it != channels.end(); ++it) { channelCount++; } - + // Allocate FloatImage. AutoPtr fimage(new FloatImage()); fimage->allocate(channelCount, width, height); - + // Describe image's layout with a framebuffer. Imf::FrameBuffer frameBuffer; uint i = 0; for (Imf::ChannelList::ConstIterator it = channels.begin(); it != channels.end(); ++it, ++i) { - int channelIndex = channelIndexFromName(it.name()); - frameBuffer.insert(it.name(), Imf::Slice(Imf::FLOAT, (char *)fimage->channel(channelIndex), sizeof(float), sizeof(float) * width)); + frameBuffer.insert(it.name(), Imf::Slice(Imf::FLOAT, (char *)fimage->channel(i), sizeof(float), sizeof(float) * width)); } - + // Read it. inputFile.setFrameBuffer (frameBuffer); inputFile.readPixels (box.min.y, box.max.y); - + return fimage.release(); } @@ -1651,34 +1150,360 @@ bool nv::ImageIO::saveFloatEXR(const char * fileName, const FloatImage * fimage, nvCheck(fimage != NULL); nvCheck(base_component + num_components <= fimage->componentNum()); nvCheck(num_components > 0 && num_components <= 4); - + const int w = fimage->width(); const int h = fimage->height(); - + const char * channelNames[] = {"R", "G", "B", "A"}; - - Imf::Header header (w, h); - + + Imf::Header header (w, h); + for (uint c = 0; c < num_components; c++) { header.channels().insert(channelNames[c], Imf::Channel(Imf::FLOAT)); } - - Imf::OutputFile file(fileName, header); - Imf::FrameBuffer frameBuffer; - + + Imf::OutputFile file(fileName, header); + Imf::FrameBuffer frameBuffer; + for (uint c = 0; c < num_components; c++) { char * channel = (char *) fimage->channel(base_component + c); frameBuffer.insert(channelNames[c], Imf::Slice(Imf::FLOAT, channel, sizeof(float), sizeof(float) * w)); } - + file.setFrameBuffer(frameBuffer); file.writePixels(h); - + return true; } #endif // defined(HAVE_OPENEXR) -#endif // defined(HAVE_FREEIMAGE) +#if 0 // @@ Disable temporarily. + +FloatImage * nv::ImageIO::loadFloatPFM(const char * fileName, Stream & s) +{ + nvCheck(s.isLoading()); + nvCheck(!s.isError()); + + Tokenizer parser(&s); + + parser.nextToken(); + + bool grayscale; + if (parser.token() == "PF") + { + grayscale = false; + } + else if (parser.token() == "Pf") + { + grayscale = true; + } + else + { + // Invalid file. + return NULL; + } + + parser.nextLine(); + + int width = parser.token().toInt(); parser.nextToken(); + int height = parser.token().toInt(); + + parser.nextLine(); + + float scaleFactor = parser.token().toFloat(); + + if (scaleFactor >= 0) + { + s.setByteOrder(Stream::BigEndian); + } + else + { + s.setByteOrder(Stream::LittleEndian); + } + scaleFactor = fabsf(scaleFactor); + + // Allocate image. + AutoPtr fimage(new FloatImage()); + + if (grayscale) + { + fimage->allocate(1, width, height); + + float * channel = fimage->channel(0); + + for (int i = 0; i < width * height; i++) + { + s << channel[i]; + } + } + else + { + fimage->allocate(3, width, height); + + float * rchannel = fimage->channel(0); + float * gchannel = fimage->channel(1); + float * bchannel = fimage->channel(2); + + for (int i = 0; i < width * height; i++) + { + s << rchannel[i] << gchannel[i] << bchannel[i]; + } + } + + return fimage.release(); +} + +bool nv::ImageIO::saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) +{ + nvCheck(fileName != NULL); + nvCheck(fimage != NULL); + nvCheck(fimage->componentNum() <= base_component + num_components); + nvCheck(num_components == 1 || num_components == 3); + + StdOutputStream stream(fileName); + TextWriter writer(&stream); + + if (num_components == 1) writer.write("Pf\n"); + else /*if (num_components == 3)*/ writer.write("PF\n"); + + int w = fimage->width(); + int h = fimage->height(); + writer.write("%d %d\n", w, h); + writer.write("%f\n", -1.0f); // little endian with 1.0 scale. + + if (num_components == 1) + { + float * channel = const_cast(fimage->channel(0)); + + for (int i = 0; i < w * h; i++) + { + stream << channel[i]; + } + } + else + { + float * rchannel = const_cast(fimage->channel(0)); + float * gchannel = const_cast(fimage->channel(1)); + float * bchannel = const_cast(fimage->channel(2)); + + for (int i = 0; i < w * h; i++) + { + stream << rchannel[i] << gchannel[i] << bchannel[i]; + } + } + + return true; +} + +#endif + +#if 0 + +/** Save PNG*/ +static bool SavePNG(const PiImage * img, const char * name) { + nvCheck( img != NULL ); + nvCheck( img->mem != NULL ); + + if( piStrCmp(piExtension(name), ".png" ) != 0 ) { + return false; + } + + if( img->flags & PI_IT_CUBEMAP ) { + nvDebug("*** Cannot save cubemaps as PNG."); + return false; + } + if( img->flags & PI_IT_DDS ) { + nvDebug("*** Cannot save DDS surface as PNG."); + return false; + } + + nvDebug( "--- Saving '%s'.\n", name ); + + PiAutoPtr ar( PiFileSystem::CreateFileWriter( name ) ); + if( ar == NULL ) { + nvDebug( "*** SavePNG: Error, cannot save file '%s'.\n", name ); + return false; + } + +/* +public class PNGEnc { + + public static function encode(img:BitmapData):ByteArray { + // Create output byte array + var png:ByteArray = new ByteArray(); + // Write PNG signature + png.writeUnsignedInt(0x89504e47); + png.writeUnsignedInt(0x0D0A1A0A); + // Build IHDR chunk + var IHDR:ByteArray = new ByteArray(); + IHDR.writeInt(img.width); + IHDR.writeInt(img.height); + IHDR.writeUnsignedInt(0x08060000); // 32bit RGBA + IHDR.writeByte(0); + writeChunk(png,0x49484452,IHDR); + // Build IDAT chunk + var IDAT:ByteArray= new ByteArray(); + for(var i:int=0;i < img.height;i++) { + // no filter + IDAT.writeByte(0); + var p:uint; + if ( !img.transparent ) { + for(var j:int=0;j < img.width;j++) { + p = img.getPixel(j,i); + IDAT.writeUnsignedInt( + uint(((p&0xFFFFFF) << 8)|0xFF)); + } + } else { + for(var j:int=0;j < img.width;j++) { + p = img.getPixel32(j,i); + IDAT.writeUnsignedInt( + uint(((p&0xFFFFFF) << 8)| + (shr(p,24)))); + } + } + } + IDAT.compress(); + writeChunk(png,0x49444154,IDAT); + // Build IEND chunk + writeChunk(png,0x49454E44,null); + // return PNG + return png; + } + + private static var crcTable:Array; + private static var crcTableComputed:Boolean = false; + + private static function writeChunk(png:ByteArray, + type:uint, data:ByteArray) { + if (!crcTableComputed) { + crcTableComputed = true; + crcTable = []; + for (var n:uint = 0; n < 256; n++) { + var c:uint = n; + for (var k:uint = 0; k < 8; k++) { + if (c & 1) { + c = uint(uint(0xedb88320) ^ + uint(c >>> 1)); + } else { + c = uint(c >>> 1); + } + } + crcTable[n] = c; + } + } + var len:uint = 0; + if (data != null) { + len = data.length; + } + png.writeUnsignedInt(len); + var p:uint = png.position; + png.writeUnsignedInt(type); + if ( data != null ) { + png.writeBytes(data); + } + var e:uint = png.position; + png.position = p; + var c:uint = 0xffffffff; + for (var i:int = 0; i < (e-p); i++) { + c = uint(crcTable[ + (c ^ png.readUnsignedByte()) & + uint(0xff)] ^ uint(c >>> 8)); + } + c = uint(c^uint(0xffffffff)); + png.position = e; + png.writeUnsignedInt(c); + } +} +*/ +} + +#endif // 0 + +#if 0 + + +namespace ImageIO { + + /** Init ImageIO plugins. */ + void InitPlugins() { + // AddInputPlugin( "", LoadANY ); + AddInputPlugin( "tga", LoadTGA ); +#if HAVE_PNG + AddInputPlugin( "png", LoadPNG ); +#endif +#if HAVE_JPEG + AddInputPlugin( "jpg", LoadJPG ); +#endif + AddInputPlugin( "dds", LoadDDS ); + + AddOutputPlugin( "tga", SaveTGA ); + } + + /** Reset ImageIO plugins. */ + void ResetPlugins() { + s_plugin_load_map.Clear(); + s_plugin_save_map.Clear(); + } + + /** Add an input plugin. */ + void AddInputPlugin( const char * ext, ImageInput_Plugin plugin ) { + s_plugin_load_map.Add(ext, plugin); + } + + /** Add an output plugin. */ + void AddOutputPlugin( const char * ext, ImageOutput_Plugin plugin ) { + s_plugin_save_map.Add(ext, plugin); + } + + + bool Load(PiImage * img, const char * name, PiStream & stream) { + + // Get name extension. + const char * extension = piExtension(name); + + // Skip the dot. + if( *extension == '.' ) { + extension++; + } + + // Lookup plugin in the map. + ImageInput_Plugin plugin = NULL; + if( s_plugin_load_map.Get(extension, &plugin) ) { + return plugin(img, stream); + } + + /*foreach(i, s_plugin_load_map) { + nvDebug("%s %s %d\n", s_plugin_load_map[i].key.GetStr(), extension, 0 == strcmp(extension, s_plugin_load_map[i].key)); + } + + nvDebug("No plugin found for '%s' %d.\n", extension, s_plugin_load_map.Size());*/ + + return false; + } + + bool Save(const PiImage * img, const char * name, PiStream & stream) { + + // Get name extension. + const char * extension = piExtension(name); + + // Skip the dot. + if( *extension == '.' ) { + extension++; + } + + // Lookup plugin in the map. + ImageOutput_Plugin plugin = NULL; + if( s_plugin_save_map.Get(extension, &plugin) ) { + return plugin(img, stream); + } + + return false; + } + +} // ImageIO + +#endif // 0 + diff --git a/src/nvimage/ImageIO.h b/src/nvimage/ImageIO.h index dfe735d..0902a5d 100644 --- a/src/nvimage/ImageIO.h +++ b/src/nvimage/ImageIO.h @@ -5,9 +5,6 @@ #include -#include - - namespace nv { class Image; @@ -16,22 +13,43 @@ namespace nv namespace ImageIO { - struct ImageMetaData - { - HashMap tagMap; - }; - NVIMAGE_API Image * load(const char * fileName); NVIMAGE_API Image * load(const char * fileName, Stream & s); NVIMAGE_API FloatImage * loadFloat(const char * fileName); NVIMAGE_API FloatImage * loadFloat(const char * fileName, Stream & s); - NVIMAGE_API bool save(const char * fileName, const Image * img, const ImageMetaData * tags=NULL); - NVIMAGE_API bool save(const char * fileName, Stream & s, const Image * img, const ImageMetaData * tags=NULL); + NVIMAGE_API bool save(const char * fileName, Stream & s, Image * img); + NVIMAGE_API bool save(const char * fileName, Image * img); + NVIMAGE_API bool saveFloat(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components); + + NVIMAGE_API Image * loadTGA(Stream & s); + NVIMAGE_API bool saveTGA(Stream & s, const Image * img); + + NVIMAGE_API Image * loadPSD(Stream & s); + +#if defined(HAVE_PNG) + NVIMAGE_API Image * loadPNG(Stream & s); +#endif + +#if defined(HAVE_JPEG) + NVIMAGE_API Image * loadJPG(Stream & s); +#endif + +#if defined(HAVE_TIFF) + NVIMAGE_API FloatImage * loadFloatTIFF(const char * fileName, Stream & s); + + NVIMAGE_API bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components); +#endif + +#if defined(HAVE_OPENEXR) + NVIMAGE_API FloatImage * loadFloatEXR(const char * fileName, Stream & s); + + NVIMAGE_API bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components); +#endif - NVIMAGE_API bool saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount); - NVIMAGE_API bool saveFloat(const char * fileName, Stream & s, const FloatImage * fimage, uint baseComponent, uint componentCount); + // NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s); + // NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components); } // ImageIO namespace diff --git a/src/nvimage/NormalMap.cpp b/src/nvimage/NormalMap.cpp index 06e0c00..2ece574 100644 --- a/src/nvimage/NormalMap.cpp +++ b/src/nvimage/NormalMap.cpp @@ -36,9 +36,9 @@ using namespace nv; // Create normal map using the given kernels. static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, const Kernel2 * kdu, const Kernel2 * kdv) { - nvDebugCheck(kdu != NULL); - nvDebugCheck(kdv != NULL); - nvDebugCheck(img != NULL); + nvCheck(kdu != NULL); + nvCheck(kdv != NULL); + nvCheck(img != NULL); const uint w = img->width(); const uint h = img->height(); @@ -75,54 +75,10 @@ static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, } -// Create normal map using the given kernels. -static FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, const Kernel2 * kdu, const Kernel2 * kdv) -{ - nvDebugCheck(kdu != NULL); - nvDebugCheck(kdv != NULL); - nvDebugCheck(img != NULL); - -#pragma message(NV_FILE_LINE "FIXME: Height scale parameter should go away. It should be a sensible value that produces good results when the heightmap is in the [0, 1] range.") - const float heightScale = 1.0f / 16.0f; - - const uint w = img->width(); - const uint h = img->height(); - - AutoPtr img_out(new FloatImage()); - img_out->allocate(4, w, h); - - for (uint y = 0; y < h; y++) - { - for (uint x = 0; x < w; x++) - { - const float du = img->applyKernel(kdu, x, y, 3, wm); - const float dv = img->applyKernel(kdv, x, y, 3, wm); - - Vector3 n = normalize(Vector3(du, dv, heightScale)); - - img_out->setPixel(n.x(), x, y, 0); - img_out->setPixel(n.y(), x, y, 1); - img_out->setPixel(n.z(), x, y, 2); - } - } - - // Copy alpha channel. - for (uint y = 0; y < h; y++) - { - for (uint x = 0; x < w; x++) - { - img_out->setPixel(img->pixel(x, y, 3), x, y, 3); - } - } - - return img_out.release(); -} - - /// Create normal map using the given filter. FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter /*= Sobel3x3*/) { - nvDebugCheck(img != NULL); + nvCheck(img != NULL); // Init the kernels. Kernel2 * kdu = NULL; @@ -159,7 +115,7 @@ FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vec /// Create normal map combining multiple sobel filters. FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights) { - nvDebugCheck(img != NULL); + nvCheck(img != NULL); Kernel2 * kdu = NULL; Kernel2 * kdv = NULL; @@ -174,32 +130,10 @@ FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vec return ::createNormalMap(img, wm, heightWeights, kdu, kdv); } - -FloatImage * nv::createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights) -{ - nvDebugCheck(img != NULL); - - Kernel2 * kdu = NULL; - Kernel2 * kdv = NULL; - - kdu = new Kernel2(9); - kdu->initBlendedSobel(filterWeights); - kdu->normalize(); - - kdv = new Kernel2(*kdu); - kdv->transpose(); - - return ::createNormalMap(img, wm, kdu, kdv); -} - - /// Normalize the given image in place. void nv::normalizeNormalMap(FloatImage * img) { - nvDebugCheck(img != NULL); - -#pragma message(NV_FILE_LINE "TODO: Pack and expand normals explicitly") - + nvCheck(img != NULL); img->expandNormals(0); img->normalize(0); img->packNormals(0); diff --git a/src/nvimage/NormalMap.h b/src/nvimage/NormalMap.h index 48b2fbe..670ead4 100644 --- a/src/nvimage/NormalMap.h +++ b/src/nvimage/NormalMap.h @@ -41,11 +41,9 @@ namespace nv NormalMapFilter_Sobel9x9, // very large }; - // @@ These two functions should be deprecated: FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3); - FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights); - FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights); + FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights); void normalizeNormalMap(FloatImage * img); diff --git a/src/nvimage/NormalMipmap.cpp b/src/nvimage/NormalMipmap.cpp new file mode 100644 index 0000000..253c6d4 --- /dev/null +++ b/src/nvimage/NormalMipmap.cpp @@ -0,0 +1,98 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include + +#include +#include + +#include +#include + +using namespace nv; + +FloatImage * nv::createNormalMipmapMap(const FloatImage * img) +{ + nvDebugCheck(img != NULL); + + uint w = img->width(); + uint h = img->height(); + + uint hw = w / 2; + uint hh = h / 2; + + FloatImage dotImg; + dotImg.allocate(1, w, h); + + FloatImage shImg; + shImg.allocate(9, hw, hh); + + SampleDistribution distribution(256); + const uint sampleCount = distribution.sampleCount(); + + for (uint d = 0; d < sampleCount; d++) + { + const float * xChannel = img->channel(0); + const float * yChannel = img->channel(1); + const float * zChannel = img->channel(2); + + Vector3 dir = distribution.sampleDir(d); + + Sh2 basis; + basis.eval(dir); + + for(uint i = 0; i < w*h; i++) + { + Vector3 normal(xChannel[i], yChannel[i], zChannel[i]); + normal = normalizeSafe(normal, Vector3(zero), 0.0f); + + dotImg.setPixel(dot(dir, normal), d); + } + + // @@ It would be nice to have a fastDownSample that took an existing image as an argument, to avoid allocations. + AutoPtr dotMip(dotImg.fastDownSample()); + + for(uint p = 0; p < hw*hh; p++) + { + float f = dotMip->pixel(p); + + // Project irradiance to sh basis and accumulate. + for (uint i = 0; i < 9; i++) + { + float & sum = shImg.channel(i)[p]; + sum += f * basis.elemAt(i); + } + } + } + + + + FloatImage * normalMipmap = new FloatImage; + normalMipmap->allocate(4, hw, hh); + + // Precompute the clamped cosine radiance transfer. + Sh2 prt; + prt.cosineTransfer(); + + // Allocate outside the loop. + Sh2 sh; + + for(uint p = 0; p < hw*hh; p++) + { + for (uint i = 0; i < 9; i++) + { + sh.elemAt(i) = shImg.channel(i)[p]; + } + + // Convolve sh irradiance by radiance transfer. + sh *= prt; + + // Now sh(0) is the ambient occlusion. + // and sh(1) is the normal direction. + + // Should we use SVD to fit only the normals to the SH? + + } + + return normalMipmap; +} + diff --git a/src/nvimage/NormalMipmap.h b/src/nvimage/NormalMipmap.h new file mode 100644 index 0000000..fc36727 --- /dev/null +++ b/src/nvimage/NormalMipmap.h @@ -0,0 +1,17 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_IMAGE_NORMALMIPMAP_H +#define NV_IMAGE_NORMALMIPMAP_H + +#include + + +namespace nv +{ + class FloatImage; + + FloatImage * createNormalMipmapMap(const FloatImage * img); + +} // nv namespace + +#endif // NV_IMAGE_NORMALMIPMAP_H diff --git a/src/nvimage/TiledImage.cpp b/src/nvimage/TiledImage.cpp deleted file mode 100644 index ae3bf6d..0000000 --- a/src/nvimage/TiledImage.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// This code is in the public domain -- castano@gmail.com - -#include "TiledImage.h" - -#include - - -using namespace nv; - -namespace -{ - // MRU helpers: - // ... - - -} - - -bool Tile::load(const char * name) -{ - StdInputStream stream(name); - - if (stream.isError()) { - return false; - } - - uint header; - stream << header; - - if (header == 'NVTC') { - return false; - } - - uint count; - stream << count; - - if (count != w*h) { - return false; - } - - const uint size = count * sizeof(float); - - return stream.serialize(data, size) == size; -} - - -bool Tile::unload(const char * name) -{ - StdOutputStream stream(name); - - if (stream.isError()) { - return false; - } - - uint header = 'NVTC'; - uint count = w * h; - const uint size = w * h * sizeof(float); - - stream << header << count; - - return stream.serialize(data, size) == size; -} - - - - - -TiledImage::TiledImage() -{ -} - -void TiledImage::allocate(uint c, uint w, uint h, uint pageCount) -{ - // Allocate page map: - const uint pw = ((w + TILE_SIZE - 1) / TILE_SIZE); - const uint ph = ((h + TILE_SIZE - 1) / TILE_SIZE); - const uint size = c * pw * ph; - m_pageMap.resize(size); - - m_residentArray.resize(pageCount, ~0); -} - -void TiledImage::prefetch(uint c, uint x, uint y) -{ -} - -void TiledImage::prefetch(uint c, uint x, uint y, uint w, uint h) -{ -} - -void TiledImage::loadPage(uint x, uint y) -{ - const uint pw = ((w + TILE_SIZE - 1) / TILE_SIZE); - const uint ph = ((h + TILE_SIZE - 1) / TILE_SIZE); - - nvDebugCheck(x < pw); - nvDebugCheck(y < ph); - - -} - - diff --git a/src/nvimage/TiledImage.h b/src/nvimage/TiledImage.h deleted file mode 100644 index 96a2745..0000000 --- a/src/nvimage/TiledImage.h +++ /dev/null @@ -1,152 +0,0 @@ -// This code is in the public domain -- castano@gmail.com - -#ifndef NV_IMAGE_TILEDIMAGE_H -#define NV_IMAGE_TILEDIMAGE_H - -#include -#include - -#include - -// For simplicity the tile size is fixed at compile time. -#define TILE_SIZE 256 - -// 256 * 256 * 4 = 2^(8+8+2) = 2^18 = 256 KB -// 512 * 512 * 4 = 2^(9+9+2) = 2^20 = 1 MB - - -namespace nv -{ -#if 0 - struct ImageConcept - { - float pixel(uint x, uint y) const; - }; - - enum WrapMode { - WrapMode_Clamp, - WrapMode_Repeat, - WrapMode_Mirror - }; - - template - class Sampler - { - // ... - }; -#endif - - - class Tile - { - Tile(uint x, uint y, uint w, uint h) : xoffset(x), yoffset(y), w(w), h(h) - { - data = new float[w*h]; - } - ~Tile() - { - delete [] data; - } - - uint size() const - { - return w * h * sizeof(float); - } - - float pixel(uint x, uint y) const - { - x -= xoffset; - y -= yoffset; - - nvDebugCheck (x < w); - nvDebugCheck (y < h); - - return data[y * w + x]; - } - - bool load(const char * name); - void unload(const char * name); - - - uint xoffset, yoffset; - uint w, h; - float * data; - }; - - - class TiledImage - { - public: - - TiledImage(); - - void allocate(uint c, uint w, uint h, uint pageCount); - - uint componentCount() const { return m_componentCount; } - uint width() const { return m_width; } - uint height() const { return m_height; } - uint pageCount() const { return m_residentArray.count(); } - - void prefetch(uint c, uint x, uint y); - void prefetch(uint c, uint x, uint y, uint w, uint h); - - float pixel(uint c, uint x, uint y); - - private: - Tile * tileAt(uint c, uint x, uint y); - Tile * tileAt(uint idx); - - uint loadPage(uint x, uint y); - void unloadPage(Tile *); - - uint addAndReplace(uint newPage); - - private: - uint m_componentCount; - uint m_width; - uint m_height; - - struct Page { - Page() : tile(NULL) {} - - String tmpFileName; - Tile * tile; - }; - - mutable Array m_pageMap; - mutable Array m_residentArray; // MRU - }; - - inline float TiledImage::pixel(uint c, uint x, uint y) - { - nvDebugCheck (c < m_componentCount); - nvDebugCheck (x < m_width); - nvDebugCheck (y < m_height); - - uint px = x / TILE_SIZE; - uint py = y / TILE_SIZE; - - Tile * tile = tileAt(c, px, py); - - if (tile == NULL) { - tile = loadPage(c, px, py); - } - - return tile->pixel(x, y); - } - - inline Tile * TiledImage::tileAt(uint c, uint x, uint y) - { - uint idx = (c * h + y) * w + x; - return tileAt(idx); - } - inline Tile * TiledImage::tileAt(uint idx) - { - return m_pageMap[idx].tile; - } - -} // nv namespace - - - -#endif // NV_IMAGE_TILEDIMAGE_H diff --git a/src/nvmath/Basis.cpp b/src/nvmath/Basis.cpp new file mode 100644 index 0000000..085e25b --- /dev/null +++ b/src/nvmath/Basis.cpp @@ -0,0 +1,173 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include + +using namespace nv; + + +/// Normalize basis vectors. +void Basis::normalize(float epsilon /*= NV_EPSILON*/) +{ + normal = ::normalize(normal, epsilon); + tangent = ::normalize(tangent, epsilon); + bitangent = ::normalize(bitangent, epsilon); +} + + +/// Gram-Schmidt orthogonalization. +/// @note Works only if the vectors are close to orthogonal. +void Basis::orthonormalize(float epsilon /*= NV_EPSILON*/) +{ + // N' = |N| + // T' = |T - (N' dot T) N'| + // B' = |B - (N' dot B) N' - (T' dot B) T'| + + normal = ::normalize(normal, epsilon); + + tangent -= normal * dot(normal, tangent); + tangent = ::normalize(tangent, epsilon); + + bitangent -= normal * dot(normal, bitangent); + bitangent -= tangent * dot(tangent, bitangent); + bitangent = ::normalize(bitangent, epsilon); +} + + +/// Robust orthonormalization. +/// Returns an orthonormal basis even when the original is degenerate. +void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/) +{ + if (length(normal) < epsilon) + { + normal = cross(tangent, bitangent); + + if (length(normal) < epsilon) + { + tangent = Vector3(1, 0, 0); + bitangent = Vector3(0, 1, 0); + normal = Vector3(0, 0, 1); + return; + } + } + normal = ::normalize(normal, epsilon); + + tangent -= normal * dot(normal, tangent); + bitangent -= normal * dot(normal, bitangent); + + if (length(tangent) < epsilon) + { + if (length(bitangent) < epsilon) + { + buildFrameForDirection(normal); + } + else + { + tangent = cross(bitangent, normal); + nvCheck(isNormalized(tangent, epsilon)); + } + } + else + { + tangent = ::normalize(tangent, epsilon); + bitangent -= tangent * dot(tangent, bitangent); + + if (length(bitangent) < epsilon) + { + bitangent = cross(tangent, normal); + nvCheck(isNormalized(bitangent)); + } + else + { + tangent = ::normalize(tangent, epsilon); + } + } + + // Check vector lengths. + nvCheck(isNormalized(normal, epsilon)); + nvCheck(isNormalized(tangent, epsilon)); + nvCheck(isNormalized(bitangent, epsilon)); + + // Check vector angles. + nvCheck(equal(dot(normal, tangent), 0.0f, epsilon)); + nvCheck(equal(dot(normal, bitangent), 0.0f, epsilon)); + nvCheck(equal(dot(tangent, bitangent), 0.0f, epsilon)); + + // Check vector orientation. + const float det = dot(cross(normal, tangent), bitangent); + nvCheck(equal(det, 1.0f, epsilon) || equal(det, -1.0f, epsilon)); +} + + +/// Build an arbitrary frame for the given direction. +void Basis::buildFrameForDirection(Vector3::Arg d) +{ + nvCheck(isNormalized(d)); + normal = d; + + // Choose minimum axis. + if (fabsf(normal.x()) < fabsf(normal.y()) && fabsf(normal.x()) < fabsf(normal.z())) + { + tangent = Vector3(1, 0, 0); + } + else if (fabsf(normal.y()) < fabsf(normal.z())) + { + tangent = Vector3(0, 1, 0); + } + else + { + tangent = Vector3(0, 0, 1); + } + + // Ortogonalize + tangent -= normal * dot(normal, tangent); + tangent = ::normalize(tangent); + + bitangent = cross(normal, tangent); +} + + + +/* +/// Transform by this basis. (From this basis to object space). +Vector3 Basis::transform(Vector3::Arg v) const +{ + Vector3 o = tangent * v.x(); + o += bitangent * v.y(); + o += normal * v.z(); + return o; +} + +/// Transform by the transpose. (From object space to this basis). +Vector3 Basis::transformT(Vector3::Arg v) +{ + return Vector3(dot(tangent, v), dot(bitangent, v), dot(normal, v)); +} + +/// Transform by the inverse. (From object space to this basis). +/// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned. +Vector3 Basis::transformI(Vector3::Arg v) const +{ + const float det = determinant(); + nvCheck(!equalf(det, 0.0f)); + + const float idet = 1.0f / det; + + // Rows of the inverse matrix. + Vector3 r0, r1, r2; + r0.x = (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet; + r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet; + r0.z = (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet; + + r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet; + r1.y = (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet; + r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet; + + r2.x = (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet; + r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet; + r2.z = (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet; + + return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)); +} +*/ + + diff --git a/src/nvmath/Basis.h b/src/nvmath/Basis.h new file mode 100644 index 0000000..7adde57 --- /dev/null +++ b/src/nvmath/Basis.h @@ -0,0 +1,78 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_MATH_BASIS_H +#define NV_MATH_BASIS_H + +#include +#include +#include + +namespace nv +{ + + /// Basis class to compute tangent space basis, ortogonalizations and to + /// transform vectors from one space to another. + struct Basis + { + /// Create a null basis. + Basis() : tangent(0, 0, 0), bitangent(0, 0, 0), normal(0, 0, 0) {} + + /// Create a basis given three vectors. + Basis(Vector3::Arg n, Vector3::Arg t, Vector3::Arg b) : tangent(t), bitangent(b), normal(n) {} + + /// Create a basis with the given tangent vectors and the handness. + Basis(Vector3::Arg n, Vector3::Arg t, float sign) + { + build(n, t, sign); + } + + NVMATH_API void normalize(float epsilon = NV_EPSILON); + NVMATH_API void orthonormalize(float epsilon = NV_EPSILON); + NVMATH_API void robustOrthonormalize(float epsilon = NV_EPSILON); + NVMATH_API void buildFrameForDirection(Vector3::Arg d); + + /// Calculate the determinant [ F G N ] to obtain the handness of the basis. + float handness() const + { + return determinant() > 0.0f ? 1.0f : -1.0f; + } + + /// Build a basis from 2 vectors and a handness flag. + void build(Vector3::Arg n, Vector3::Arg t, float sign) + { + normal = n; + tangent = t; + bitangent = sign * cross(t, n); + } + + /// Compute the determinant of this basis. + float determinant() const + { + return + tangent.x() * bitangent.y() * normal.z() - tangent.z() * bitangent.y() * normal.x() + + tangent.y() * bitangent.z() * normal.x() - tangent.y() * bitangent.x() * normal.z() + + tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y(); + } + + /* + // Get transform matrix for this basis. + NVMATH_API Matrix matrix() const; + + // Transform by this basis. (From this basis to object space). + NVMATH_API Vector3 transform(Vector3::Arg v) const; + + // Transform by the transpose. (From object space to this basis). + NVMATH_API Vector3 transformT(Vector3::Arg v); + + // Transform by the inverse. (From object space to this basis). + NVMATH_API Vector3 transformI(Vector3::Arg v) const; + */ + + Vector3 tangent; + Vector3 bitangent; + Vector3 normal; + }; + +} // nv namespace + +#endif // NV_MATH_BASIS_H diff --git a/src/nvmath/Box.h b/src/nvmath/Box.h index ed88a1b..212432d 100644 --- a/src/nvmath/Box.h +++ b/src/nvmath/Box.h @@ -9,7 +9,6 @@ namespace nv { -class Stream; /// Axis Aligned Bounding Box. class Box @@ -28,13 +27,11 @@ public: // Cast operators. operator const float * () const { return reinterpret_cast(this); } - // Min corner of the box. - Vector3 minCorner() const { return m_mins; } - Vector3 & minCorner() { return m_mins; } + /// Min corner of the box. + Vector3 mins() const { return m_mins; } - // Max corner of the box. - Vector3 maxCorner() const { return m_maxs; } - Vector3 & maxCorner() { return m_maxs; } + /// Max corner of the box. + Vector3 maxs() const { return m_maxs; } /// Clear the bounds. void clearBounds() @@ -129,8 +126,6 @@ public: m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z(); } - friend Stream & operator<< (Stream & s, Box & box); - private: Vector3 m_mins; diff --git a/src/nvmath/CMakeLists.txt b/src/nvmath/CMakeLists.txt index ff94666..7ea4a80 100644 --- a/src/nvmath/CMakeLists.txt +++ b/src/nvmath/CMakeLists.txt @@ -4,11 +4,14 @@ SET(MATH_SRCS nvmath.h Vector.h Matrix.h - Plane.h Plane.cpp + Quaternion.h Box.h Color.h - Half.h Half.cpp - Fitting.h Fitting.cpp) + Montecarlo.h Montecarlo.cpp + Random.h Random.cpp + SphericalHarmonic.h SphericalHarmonic.cpp + Basis.h Basis.cpp + Triangle.h Triangle.cpp TriBox.cpp) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/nvmath/Fitting.cpp b/src/nvmath/Fitting.cpp deleted file mode 100644 index 77e8af4..0000000 --- a/src/nvmath/Fitting.cpp +++ /dev/null @@ -1,247 +0,0 @@ -// This code is in the public domain -- icastano@gmail.com - -#include "Fitting.h" - -#include // max -#include // swap - -#include // FLT_MAX - -using namespace nv; - -// @@ Move to EigenSolver.h -static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matrix) -{ - if (matrix[0] == 0 || matrix[3] == 0 || matrix[5] == 0) - { - return Vector3(zero); - } - - const int NUM = 8; - - Vector3 v(1, 1, 1); - for (int i = 0; i < NUM; i++) - { - float x = v.x() * matrix[0] + v.y() * matrix[1] + v.z() * matrix[2]; - float y = v.x() * matrix[1] + v.y() * matrix[3] + v.z() * matrix[4]; - float z = v.x() * matrix[2] + v.y() * matrix[4] + v.z() * matrix[5]; - - float norm = max(max(x, y), z); - - v = Vector3(x, y, z) / norm; - } - - return v; -} - - -Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points) -{ - Vector3 centroid(zero); - - for (int i = 0; i < n; i++) - { - centroid += points[i]; - } - centroid /= float(n); - - return centroid; -} - -Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) -{ - Vector3 centroid(zero); - float total = 0.0f; - - for (int i = 0; i < n; i++) - { - total += weights[i]; - centroid += weights[i]*points[i]; - } - centroid /= total; - - return centroid; -} - - -Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, float *__restrict covariance) -{ - // compute the centroid - Vector3 centroid = computeCentroid(n, points); - - // compute covariance matrix - for (int i = 0; i < 6; i++) - { - covariance[i] = 0.0f; - } - - for (int i = 0; i < n; i++) - { - Vector3 v = points[i] - centroid; - - covariance[0] += v.x() * v.x(); - covariance[1] += v.x() * v.y(); - covariance[2] += v.x() * v.z(); - covariance[3] += v.y() * v.y(); - covariance[4] += v.y() * v.z(); - covariance[5] += v.z() * v.z(); - } - - return centroid; -} - -Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, float *__restrict covariance) -{ - // compute the centroid - Vector3 centroid = computeCentroid(n, points, weights, metric); - - // compute covariance matrix - for (int i = 0; i < 6; i++) - { - covariance[i] = 0.0f; - } - - for (int i = 0; i < n; i++) - { - Vector3 a = (points[i] - centroid) * metric; - Vector3 b = weights[i]*a; - - covariance[0] += a.x()*b.x(); - covariance[1] += a.x()*b.y(); - covariance[2] += a.x()*b.z(); - covariance[3] += a.y()*b.y(); - covariance[4] += a.y()*b.z(); - covariance[5] += a.z()*b.z(); - } - - return centroid; -} - -Vector3 nv::Fit::computePrincipalComponent(int n, const Vector3 *__restrict points) -{ - float matrix[6]; - computeCovariance(n, points, matrix); - - return firstEigenVector_PowerMethod(matrix); -} - -Vector3 nv::Fit::computePrincipalComponent(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) -{ - float matrix[6]; - computeCovariance(n, points, weights, metric, matrix); - - return firstEigenVector_PowerMethod(matrix); -} - - -Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points) -{ - // compute the centroid and covariance - float matrix[6]; - Vector3 centroid = computeCovariance(n, points, matrix); - - if (matrix[0] == 0 || matrix[3] == 0 || matrix[5] == 0) - { - // If no plane defined, then return a horizontal plane. - return Plane(Vector3(0, 0, 1), centroid); - } - -#pragma message(NV_FILE_LINE "TODO: need to write an eigensolver!") - - // - Numerical Recipes in C is a good reference. Householder transforms followed by QL decomposition seems to be the best approach. - // - The one from magic-tools is now LGPL. For the 3D case it uses a cubic root solver, which is not very accurate. - // - Charles' Galaxy3 contains an implementation of the tridiagonalization method, but is under BPL. - - //EigenSolver3 solver(matrix); - - return Plane(); -} - - -int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, Vector3 *__restrict cluster) -{ - // Compute principal component. - float matrix[6]; - Vector3 centroid = computeCovariance(n, points, weights, metric, matrix); - Vector3 principal = firstEigenVector_PowerMethod(matrix); - - // Pick initial solution. - int mini, maxi; - mini = maxi = 0; - - float mindps, maxdps; - mindps = maxdps = dot(points[0] - centroid, principal); - - for (int i = 1; i < n; ++i) - { - float dps = dot(points[i] - centroid, principal); - - if (dps < mindps) { - mindps = dps; - mini = i; - } - else { - maxdps = dps; - maxi = i; - } - } - - cluster[0] = centroid + mindps * principal; - cluster[1] = centroid + maxdps * principal; - cluster[2] = (2 * cluster[0] + cluster[1]) / 3; - cluster[3] = (2 * cluster[1] + cluster[0]) / 3; - - // Now we have to iteratively refine the clusters. - while (true) - { - Vector3 newCluster[4] = { Vector3(zero), Vector3(zero), Vector3(zero), Vector3(zero) }; - float total[4] = {0, 0, 0, 0}; - - for (int i = 0; i < n; ++i) - { - // Find nearest cluster. - int nearest = 0; - float mindist = FLT_MAX; - for (int j = 0; j < 4; j++) - { - float dist = length_squared((cluster[j] - points[i]) * metric); - if (dist < mindist) - { - mindist = dist; - nearest = j; - } - } - - newCluster[nearest] += weights[i] * points[i]; - total[nearest] += weights[i]; - } - - for (int j = 0; j < 4; j++) - { - if (total[j] != 0) - newCluster[j] /= total[j]; - } - - if (equal(cluster[0], newCluster[0]) && equal(cluster[1], newCluster[1]) && - equal(cluster[2], newCluster[2]) && equal(cluster[3], newCluster[3])) - { - return (total[0] != 0) + (total[1] != 0) + (total[2] != 0) + (total[3] != 0); - } - - cluster[0] = newCluster[0]; - cluster[1] = newCluster[1]; - cluster[2] = newCluster[2]; - cluster[3] = newCluster[3]; - - // Sort clusters by weight. - for (int i = 0; i < 4; i++) - { - for (int j = i; j > 0 && total[j] > total[j - 1]; j--) - { - swap( total[j], total[j - 1] ); - swap( cluster[j], cluster[j - 1] ); - } - } - } -} - diff --git a/src/nvmath/Fitting.h b/src/nvmath/Fitting.h deleted file mode 100644 index ec0d84b..0000000 --- a/src/nvmath/Fitting.h +++ /dev/null @@ -1,31 +0,0 @@ -// This code is in the public domain -- icastano@gmail.com - -#ifndef NV_MATH_FITTING_H -#define NV_MATH_FITTING_H - -#include -#include -#include - -namespace nv -{ - namespace Fit - { - Vector3 computeCentroid(int n, const Vector3 * points); - Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, Vector3::Arg metric); - - Vector3 computeCovariance(int n, const Vector3 * points, float * covariance); - Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, Vector3::Arg metric, float * covariance); - - Vector3 computePrincipalComponent(int n, const Vector3 * points); - Vector3 computePrincipalComponent(int n, const Vector3 * points, const float * weights, Vector3::Arg metric); - - Plane bestPlane(int n, const Vector3 * points); - - // Returns number of clusters [1-4]. - int compute4Means(int n, const Vector3 * points, const float * weights, Vector3::Arg metric, Vector3 * cluster); - } - -} // nv namespace - -#endif // NV_MATH_FITTING_H diff --git a/src/nvmath/Half.cpp b/src/nvmath/Half.cpp deleted file mode 100644 index da5b013..0000000 --- a/src/nvmath/Half.cpp +++ /dev/null @@ -1,563 +0,0 @@ -// Branch-free implementation of half-precision (16 bit) floating point -// Copyright 2006 Mike Acton -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE -// -// Half-precision floating point format -// ------------------------------------ -// -// | Field | Last | First | Note -// |----------|------|-------|---------- -// | Sign | 15 | 15 | -// | Exponent | 14 | 10 | Bias = 15 -// | Mantissa | 9 | 0 | -// -// Compiling -// --------- -// -// Preferred compile flags for GCC: -// -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing -// -// This file is a C99 source file, intended to be compiled with a C99 -// compliant compiler. However, for the moment it remains combatible -// with C++98. Therefore if you are using a compiler that poorly implements -// C standards (e.g. MSVC), it may be compiled as C++. This is not -// guaranteed for future versions. -// -// Features -// -------- -// -// * QNaN + = QNaN -// * + +INF = +INF -// * - -INF = -INF -// * INF - INF = SNaN -// * Denormalized values -// * Difference of ZEROs is always +ZERO -// * Sum round with guard + round + sticky bit (grs) -// * And of course... no branching -// -// Precision of Sum -// ---------------- -// -// (SUM) uint16 z = half_add( x, y ); -// (DIFFERENCE) uint16 z = half_add( x, -y ); -// -// Will have exactly (0 ulps difference) the same result as: -// (For 32 bit IEEE 784 floating point and same rounding mode) -// -// union FLOAT_32 -// { -// float f32; -// uint32 u32; -// }; -// -// union FLOAT_32 fx = { .u32 = half_to_float( x ) }; -// union FLOAT_32 fy = { .u32 = half_to_float( y ) }; -// union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32 }; -// uint16 z = float_to_half( fz ); -// - -#include "Half.h" -#include - -// Load immediate -static inline uint32 _uint32_li( uint32 a ) -{ - return (a); -} - -// Decrement -static inline uint32 _uint32_dec( uint32 a ) -{ - return (a - 1); -} - -// Complement -static inline uint32 _uint32_not( uint32 a ) -{ - return (~a); -} - -// Negate -static inline uint32 _uint32_neg( uint32 a ) -{ -#if NV_CC_MSVC - // prevent msvc warning. - return ~a + 1; -#else - return (-a); -#endif -} - -// Extend sign -static inline uint32 _uint32_ext( uint32 a ) -{ - return (((int32)a)>>31); -} - -// And -static inline uint32 _uint32_and( uint32 a, uint32 b ) -{ - return (a & b); -} - -// And with Complement -static inline uint32 _uint32_andc( uint32 a, uint32 b ) -{ - return (a & ~b); -} - -// Or -static inline uint32 _uint32_or( uint32 a, uint32 b ) -{ - return (a | b); -} - -// Shift Right Logical -static inline uint32 _uint32_srl( uint32 a, int sa ) -{ - return (a >> sa); -} - -// Shift Left Logical -static inline uint32 _uint32_sll( uint32 a, int sa ) -{ - return (a << sa); -} - -// Add -static inline uint32 _uint32_add( uint32 a, uint32 b ) -{ - return (a + b); -} - -// Subtract -static inline uint32 _uint32_sub( uint32 a, uint32 b ) -{ - return (a - b); -} - -// Select on Sign bit -static inline uint32 _uint32_sels( uint32 test, uint32 a, uint32 b ) -{ - const uint32 mask = _uint32_ext( test ); - const uint32 sel_a = _uint32_and( a, mask ); - const uint32 sel_b = _uint32_andc( b, mask ); - const uint32 result = _uint32_or( sel_a, sel_b ); - - return (result); -} - -// Load Immediate -static inline uint16 _uint16_li( uint16 a ) -{ - return (a); -} - -// Extend sign -static inline uint16 _uint16_ext( uint16 a ) -{ - return (((int16)a)>>15); -} - -// Negate -static inline uint16 _uint16_neg( uint16 a ) -{ - return (-a); -} - -// Complement -static inline uint16 _uint16_not( uint16 a ) -{ - return (~a); -} - -// Decrement -static inline uint16 _uint16_dec( uint16 a ) -{ - return (a - 1); -} - -// Shift Left Logical -static inline uint16 _uint16_sll( uint16 a, int sa ) -{ - return (a << sa); -} - -// Shift Right Logical -static inline uint16 _uint16_srl( uint16 a, int sa ) -{ - return (a >> sa); -} - -// Add -static inline uint16 _uint16_add( uint16 a, uint16 b ) -{ - return (a + b); -} - -// Subtract -static inline uint16 _uint16_sub( uint16 a, uint16 b ) -{ - return (a - b); -} - -// And -static inline uint16 _uint16_and( uint16 a, uint16 b ) -{ - return (a & b); -} - -// Or -static inline uint16 _uint16_or( uint16 a, uint16 b ) -{ - return (a | b); -} - -// Exclusive Or -static inline uint16 _uint16_xor( uint16 a, uint16 b ) -{ - return (a ^ b); -} - -// And with Complement -static inline uint16 _uint16_andc( uint16 a, uint16 b ) -{ - return (a & ~b); -} - -// And then Shift Right Logical -static inline uint16 _uint16_andsrl( uint16 a, uint16 b, int sa ) -{ - return ((a & b) >> sa); -} - -// Shift Right Logical then Mask -static inline uint16 _uint16_srlm( uint16 a, int sa, uint16 mask ) -{ - return ((a >> sa) & mask); -} - -// Add then Mask -static inline uint16 _uint16_addm( uint16 a, uint16 b, uint16 mask ) -{ - return ((a + b) & mask); -} - - -// Select on Sign bit -static inline uint16 _uint16_sels( uint16 test, uint16 a, uint16 b ) -{ - const uint16 mask = _uint16_ext( test ); - const uint16 sel_a = _uint16_and( a, mask ); - const uint16 sel_b = _uint16_andc( b, mask ); - const uint16 result = _uint16_or( sel_a, sel_b ); - - return (result); -} - -// Count Leading Zeros -static inline uint32 _uint32_cntlz( uint32 x ) -{ -#ifdef __GNUC__ - /* On PowerPC, this will map to insn: cntlzw */ - /* On Pentium, this will map to insn: clz */ - uint32 nlz = __builtin_clz( x ); - return (nlz); -#else - const uint32 x0 = _uint32_srl( x, 1 ); - const uint32 x1 = _uint32_or( x, x0 ); - const uint32 x2 = _uint32_srl( x1, 2 ); - const uint32 x3 = _uint32_or( x1, x2 ); - const uint32 x4 = _uint32_srl( x3, 4 ); - const uint32 x5 = _uint32_or( x3, x4 ); - const uint32 x6 = _uint32_srl( x5, 8 ); - const uint32 x7 = _uint32_or( x5, x6 ); - const uint32 x8 = _uint32_srl( x7, 16 ); - const uint32 x9 = _uint32_or( x7, x8 ); - const uint32 xA = _uint32_not( x9 ); - const uint32 xB = _uint32_srl( xA, 1 ); - const uint32 xC = _uint32_and( xB, 0x55555555 ); - const uint32 xD = _uint32_sub( xA, xC ); - const uint32 xE = _uint32_and( xD, 0x33333333 ); - const uint32 xF = _uint32_srl( xD, 2 ); - const uint32 x10 = _uint32_and( xF, 0x33333333 ); - const uint32 x11 = _uint32_add( xE, x10 ); - const uint32 x12 = _uint32_srl( x11, 4 ); - const uint32 x13 = _uint32_add( x11, x12 ); - const uint32 x14 = _uint32_and( x13, 0x0f0f0f0f ); - const uint32 x15 = _uint32_srl( x14, 8 ); - const uint32 x16 = _uint32_add( x14, x15 ); - const uint32 x17 = _uint32_srl( x16, 16 ); - const uint32 x18 = _uint32_add( x16, x17 ); - const uint32 x19 = _uint32_and( x18, 0x0000003f ); - return ( x19 ); -#endif -} - -// Count Leading Zeros -static inline uint16 _uint16_cntlz( uint16 x ) -{ -#ifdef __GNUC__ - /* On PowerPC, this will map to insn: cntlzw */ - /* On Pentium, this will map to insn: clz */ - uint32 x32 = _uint32_sll( x, 16 ); - uint16 nlz = (uint16)__builtin_clz( x32 ); - return (nlz); -#else - const uint16 x0 = _uint16_srl( x, 1 ); - const uint16 x1 = _uint16_or( x, x0 ); - const uint16 x2 = _uint16_srl( x1, 2 ); - const uint16 x3 = _uint16_or( x1, x2 ); - const uint16 x4 = _uint16_srl( x3, 4 ); - const uint16 x5 = _uint16_or( x3, x4 ); - const uint16 x6 = _uint16_srl( x5, 8 ); - const uint16 x7 = _uint16_or( x5, x6 ); - const uint16 x8 = _uint16_not( x7 ); - const uint16 x9 = _uint16_srlm( x8, 1, 0x5555 ); - const uint16 xA = _uint16_sub( x8, x9 ); - const uint16 xB = _uint16_and( xA, 0x3333 ); - const uint16 xC = _uint16_srlm( xA, 2, 0x3333 ); - const uint16 xD = _uint16_add( xB, xC ); - const uint16 xE = _uint16_srl( xD, 4 ); - const uint16 xF = _uint16_addm( xD, xE, 0x0f0f ); - const uint16 x10 = _uint16_srl( xF, 8 ); - const uint16 x11 = _uint16_addm( xF, x10, 0x001f ); - return ( x11 ); -#endif -} - -uint16 -half_from_float( uint32 f ) -{ - const uint32 one = _uint32_li( 0x00000001 ); - const uint32 f_e_mask = _uint32_li( 0x7f800000 ); - const uint32 f_m_mask = _uint32_li( 0x007fffff ); - const uint32 f_s_mask = _uint32_li( 0x80000000 ); - const uint32 h_e_mask = _uint32_li( 0x00007c00 ); - const uint32 f_e_pos = _uint32_li( 0x00000017 ); - const uint32 f_m_round_bit = _uint32_li( 0x00001000 ); - const uint32 h_nan_em_min = _uint32_li( 0x00007c01 ); - const uint32 f_h_s_pos_offset = _uint32_li( 0x00000010 ); - const uint32 f_m_hidden_bit = _uint32_li( 0x00800000 ); - const uint32 f_h_m_pos_offset = _uint32_li( 0x0000000d ); - const uint32 f_h_bias_offset = _uint32_li( 0x38000000 ); - const uint32 f_m_snan_mask = _uint32_li( 0x003fffff ); - const uint16 h_snan_mask = _uint32_li( 0x00007e00 ); - const uint32 f_e = _uint32_and( f, f_e_mask ); - const uint32 f_m = _uint32_and( f, f_m_mask ); - const uint32 f_s = _uint32_and( f, f_s_mask ); - const uint32 f_e_h_bias = _uint32_sub( f_e, f_h_bias_offset ); - const uint32 f_e_h_bias_amount = _uint32_srl( f_e_h_bias, f_e_pos ); - const uint32 f_m_round_mask = _uint32_and( f_m, f_m_round_bit ); - const uint32 f_m_round_offset = _uint32_sll( f_m_round_mask, one ); - const uint32 f_m_rounded = _uint32_add( f_m, f_m_round_offset ); - const uint32 f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit ); - const uint32 f_m_denorm_sa = _uint32_sub( one, f_e_h_bias_amount ); - const uint32 f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit ); - const uint32 f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa ); - const uint32 f_em_norm_packed = _uint32_or( f_e_h_bias, f_m_rounded ); - const uint32 f_e_overflow = _uint32_add( f_e_h_bias, f_m_hidden_bit ); - const uint32 h_s = _uint32_srl( f_s, f_h_s_pos_offset ); - const uint32 h_m_nan = _uint32_srl( f_m, f_h_m_pos_offset ); - const uint32 h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset ); - const uint32 h_em_norm = _uint32_srl( f_em_norm_packed, f_h_m_pos_offset ); - const uint32 h_em_overflow = _uint32_srl( f_e_overflow, f_h_m_pos_offset ); - const uint32 is_e_eqz_msb = _uint32_dec( f_e ); - const uint32 is_m_nez_msb = _uint32_neg( f_m ); - const uint32 is_h_m_nan_nez_msb = _uint32_neg( h_m_nan ); - const uint32 is_e_nflagged_msb = _uint32_sub( f_e, f_e_mask ); - const uint32 is_ninf_msb = _uint32_or( is_e_nflagged_msb, is_m_nez_msb ); - const uint32 is_underflow_msb = _uint32_sub( is_e_eqz_msb, f_h_bias_offset ); - const uint32 is_nan_nunderflow_msb = _uint32_or( is_h_m_nan_nez_msb, is_e_nflagged_msb ); - const uint32 is_m_snan_msb = _uint32_sub( f_m_snan_mask, f_m ); - const uint32 is_snan_msb = _uint32_andc( is_m_snan_msb, is_e_nflagged_msb ); - const uint32 is_overflow_msb = _uint32_neg( f_m_rounded_overflow ); - const uint32 h_nan_underflow_result = _uint32_sels( is_nan_nunderflow_msb, h_em_norm, h_nan_em_min ); - const uint32 h_inf_result = _uint32_sels( is_ninf_msb, h_nan_underflow_result, h_e_mask ); - const uint32 h_underflow_result = _uint32_sels( is_underflow_msb, h_m_denorm, h_inf_result ); - const uint32 h_overflow_result = _uint32_sels( is_overflow_msb, h_em_overflow, h_underflow_result ); - const uint32 h_em_result = _uint32_sels( is_snan_msb, h_snan_mask, h_overflow_result ); - const uint32 h_result = _uint32_or( h_em_result, h_s ); - - return (h_result); -} - -uint32 -half_to_float( uint16 h ) -{ - const uint32 h_e_mask = _uint32_li( 0x00007c00 ); - const uint32 h_m_mask = _uint32_li( 0x000003ff ); - const uint32 h_s_mask = _uint32_li( 0x00008000 ); - const uint32 h_f_s_pos_offset = _uint32_li( 0x00000010 ); - const uint32 h_f_e_pos_offset = _uint32_li( 0x0000000d ); - const uint32 h_f_bias_offset = _uint32_li( 0x0001c000 ); - const uint32 f_e_mask = _uint32_li( 0x7f800000 ); - const uint32 f_m_mask = _uint32_li( 0x007fffff ); - const uint32 h_f_e_denorm_bias = _uint32_li( 0x0000007e ); - const uint32 h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 ); - const uint32 f_e_pos = _uint32_li( 0x00000017 ); - const uint32 h_e_mask_minus_one = _uint32_li( 0x00007bff ); - const uint32 h_e = _uint32_and( h, h_e_mask ); - const uint32 h_m = _uint32_and( h, h_m_mask ); - const uint32 h_s = _uint32_and( h, h_s_mask ); - const uint32 h_e_f_bias = _uint32_add( h_e, h_f_bias_offset ); - const uint32 h_m_nlz = _uint32_cntlz( h_m ); - const uint32 f_s = _uint32_sll( h_s, h_f_s_pos_offset ); - const uint32 f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset ); - const uint32 f_m = _uint32_sll( h_m, h_f_e_pos_offset ); - const uint32 f_em = _uint32_or( f_e, f_m ); - const uint32 h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias ); - const uint32 f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa ); - const uint32 h_f_m = _uint32_sll( h_m, h_f_m_sa ); - const uint32 f_m_denorm = _uint32_and( h_f_m, f_m_mask ); - const uint32 f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos ); - const uint32 f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm ); - const uint32 f_em_nan = _uint32_or( f_e_mask, f_m ); - const uint32 is_e_eqz_msb = _uint32_dec( h_e ); - const uint32 is_m_nez_msb = _uint32_neg( h_m ); - const uint32 is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e ); - const uint32 is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb ); - const uint32 is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb ); - const uint32 is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb ); - const uint32 is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb ); - const uint32 is_zero = _uint32_ext( is_zero_msb ); - const uint32 f_zero_result = _uint32_andc( f_em, is_zero ); - const uint32 f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result ); - const uint32 f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result ); - const uint32 f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result ); - const uint32 f_result = _uint32_or( f_s, f_nan_result ); - - return (f_result); -} - -uint16 -half_add( uint16 x, uint16 y ) -{ - const uint16 one = _uint16_li( 0x0001 ); - const uint16 msb_to_lsb_sa = _uint16_li( 0x000f ); - const uint16 h_s_mask = _uint16_li( 0x8000 ); - const uint16 h_e_mask = _uint16_li( 0x7c00 ); - const uint16 h_m_mask = _uint16_li( 0x03ff ); - const uint16 h_m_msb_mask = _uint16_li( 0x2000 ); - const uint16 h_m_msb_sa = _uint16_li( 0x000d ); - const uint16 h_m_hidden = _uint16_li( 0x0400 ); - const uint16 h_e_pos = _uint16_li( 0x000a ); - const uint16 h_e_bias_minus_one = _uint16_li( 0x000e ); - const uint16 h_m_grs_carry = _uint16_li( 0x4000 ); - const uint16 h_m_grs_carry_pos = _uint16_li( 0x000e ); - const uint16 h_grs_size = _uint16_li( 0x0003 ); - const uint16 h_snan = _uint16_li( 0xfe00 ); - const uint16 h_e_mask_minus_one = _uint16_li( 0x7bff ); - const uint16 h_grs_round_carry = _uint16_sll( one, h_grs_size ); - const uint16 h_grs_round_mask = _uint16_sub( h_grs_round_carry, one ); - const uint16 x_e = _uint16_and( x, h_e_mask ); - const uint16 y_e = _uint16_and( y, h_e_mask ); - const uint16 is_y_e_larger_msb = _uint16_sub( x_e, y_e ); - const uint16 a = _uint16_sels( is_y_e_larger_msb, y, x); - const uint16 a_s = _uint16_and( a, h_s_mask ); - const uint16 a_e = _uint16_and( a, h_e_mask ); - const uint16 a_m_no_hidden_bit = _uint16_and( a, h_m_mask ); - const uint16 a_em_no_hidden_bit = _uint16_or( a_e, a_m_no_hidden_bit ); - const uint16 b = _uint16_sels( is_y_e_larger_msb, x, y); - const uint16 b_s = _uint16_and( b, h_s_mask ); - const uint16 b_e = _uint16_and( b, h_e_mask ); - const uint16 b_m_no_hidden_bit = _uint16_and( b, h_m_mask ); - const uint16 b_em_no_hidden_bit = _uint16_or( b_e, b_m_no_hidden_bit ); - const uint16 is_diff_sign_msb = _uint16_xor( a_s, b_s ); - const uint16 is_a_inf_msb = _uint16_sub( h_e_mask_minus_one, a_em_no_hidden_bit ); - const uint16 is_b_inf_msb = _uint16_sub( h_e_mask_minus_one, b_em_no_hidden_bit ); - const uint16 is_undenorm_msb = _uint16_dec( a_e ); - const uint16 is_undenorm = _uint16_ext( is_undenorm_msb ); - const uint16 is_both_inf_msb = _uint16_and( is_a_inf_msb, is_b_inf_msb ); - const uint16 is_invalid_inf_op_msb = _uint16_and( is_both_inf_msb, b_s ); - const uint16 is_a_e_nez_msb = _uint16_neg( a_e ); - const uint16 is_b_e_nez_msb = _uint16_neg( b_e ); - const uint16 is_a_e_nez = _uint16_ext( is_a_e_nez_msb ); - const uint16 is_b_e_nez = _uint16_ext( is_b_e_nez_msb ); - const uint16 a_m_hidden_bit = _uint16_and( is_a_e_nez, h_m_hidden ); - const uint16 b_m_hidden_bit = _uint16_and( is_b_e_nez, h_m_hidden ); - const uint16 a_m_no_grs = _uint16_or( a_m_no_hidden_bit, a_m_hidden_bit ); - const uint16 b_m_no_grs = _uint16_or( b_m_no_hidden_bit, b_m_hidden_bit ); - const uint16 diff_e = _uint16_sub( a_e, b_e ); - const uint16 a_e_unbias = _uint16_sub( a_e, h_e_bias_minus_one ); - const uint16 a_m = _uint16_sll( a_m_no_grs, h_grs_size ); - const uint16 a_e_biased = _uint16_srl( a_e, h_e_pos ); - const uint16 m_sa_unbias = _uint16_srl( a_e_unbias, h_e_pos ); - const uint16 m_sa_default = _uint16_srl( diff_e, h_e_pos ); - const uint16 m_sa_unbias_mask = _uint16_andc( is_a_e_nez_msb, is_b_e_nez_msb ); - const uint16 m_sa = _uint16_sels( m_sa_unbias_mask, m_sa_unbias, m_sa_default ); - const uint16 b_m_no_sticky = _uint16_sll( b_m_no_grs, h_grs_size ); - const uint16 sh_m = _uint16_srl( b_m_no_sticky, m_sa ); - const uint16 sticky_overflow = _uint16_sll( one, m_sa ); - const uint16 sticky_mask = _uint16_dec( sticky_overflow ); - const uint16 sticky_collect = _uint16_and( b_m_no_sticky, sticky_mask ); - const uint16 is_sticky_set_msb = _uint16_neg( sticky_collect ); - const uint16 sticky = _uint16_srl( is_sticky_set_msb, msb_to_lsb_sa); - const uint16 b_m = _uint16_or( sh_m, sticky ); - const uint16 is_c_m_ab_pos_msb = _uint16_sub( b_m, a_m ); - const uint16 c_inf = _uint16_or( a_s, h_e_mask ); - const uint16 c_m_sum = _uint16_add( a_m, b_m ); - const uint16 c_m_diff_ab = _uint16_sub( a_m, b_m ); - const uint16 c_m_diff_ba = _uint16_sub( b_m, a_m ); - const uint16 c_m_smag_diff = _uint16_sels( is_c_m_ab_pos_msb, c_m_diff_ab, c_m_diff_ba ); - const uint16 c_s_diff = _uint16_sels( is_c_m_ab_pos_msb, a_s, b_s ); - const uint16 c_s = _uint16_sels( is_diff_sign_msb, c_s_diff, a_s ); - const uint16 c_m_smag_diff_nlz = _uint16_cntlz( c_m_smag_diff ); - const uint16 diff_norm_sa = _uint16_sub( c_m_smag_diff_nlz, one ); - const uint16 is_diff_denorm_msb = _uint16_sub( a_e_biased, diff_norm_sa ); - const uint16 is_diff_denorm = _uint16_ext( is_diff_denorm_msb ); - const uint16 is_a_or_b_norm_msb = _uint16_neg( a_e_biased ); - const uint16 diff_denorm_sa = _uint16_dec( a_e_biased ); - const uint16 c_m_diff_denorm = _uint16_sll( c_m_smag_diff, diff_denorm_sa ); - const uint16 c_m_diff_norm = _uint16_sll( c_m_smag_diff, diff_norm_sa ); - const uint16 c_e_diff_norm = _uint16_sub( a_e_biased, diff_norm_sa ); - const uint16 c_m_diff_ab_norm = _uint16_sels( is_diff_denorm_msb, c_m_diff_denorm, c_m_diff_norm ); - const uint16 c_e_diff_ab_norm = _uint16_andc( c_e_diff_norm, is_diff_denorm ); - const uint16 c_m_diff = _uint16_sels( is_a_or_b_norm_msb, c_m_diff_ab_norm, c_m_smag_diff ); - const uint16 c_e_diff = _uint16_sels( is_a_or_b_norm_msb, c_e_diff_ab_norm, a_e_biased ); - const uint16 is_diff_eqz_msb = _uint16_dec( c_m_diff ); - const uint16 is_diff_exactly_zero_msb = _uint16_and( is_diff_sign_msb, is_diff_eqz_msb ); - const uint16 is_diff_exactly_zero = _uint16_ext( is_diff_exactly_zero_msb ); - const uint16 c_m_added = _uint16_sels( is_diff_sign_msb, c_m_diff, c_m_sum ); - const uint16 c_e_added = _uint16_sels( is_diff_sign_msb, c_e_diff, a_e_biased ); - const uint16 c_m_carry = _uint16_and( c_m_added, h_m_grs_carry ); - const uint16 is_c_m_carry_msb = _uint16_neg( c_m_carry ); - const uint16 c_e_hidden_offset = _uint16_andsrl( c_m_added, h_m_grs_carry, h_m_grs_carry_pos ); - const uint16 c_m_sub_hidden = _uint16_srl( c_m_added, one ); - const uint16 c_m_no_hidden = _uint16_sels( is_c_m_carry_msb, c_m_sub_hidden, c_m_added ); - const uint16 c_e_no_hidden = _uint16_add( c_e_added, c_e_hidden_offset ); - const uint16 c_m_no_hidden_msb = _uint16_and( c_m_no_hidden, h_m_msb_mask ); - const uint16 undenorm_m_msb_odd = _uint16_srl( c_m_no_hidden_msb, h_m_msb_sa ); - const uint16 undenorm_fix_e = _uint16_and( is_undenorm, undenorm_m_msb_odd ); - const uint16 c_e_fixed = _uint16_add( c_e_no_hidden, undenorm_fix_e ); - const uint16 c_m_round_amount = _uint16_and( c_m_no_hidden, h_grs_round_mask ); - const uint16 c_m_rounded = _uint16_add( c_m_no_hidden, c_m_round_amount ); - const uint16 c_m_round_overflow = _uint16_andsrl( c_m_rounded, h_m_grs_carry, h_m_grs_carry_pos ); - const uint16 c_e_rounded = _uint16_add( c_e_fixed, c_m_round_overflow ); - const uint16 c_m_no_grs = _uint16_srlm( c_m_rounded, h_grs_size, h_m_mask ); - const uint16 c_e = _uint16_sll( c_e_rounded, h_e_pos ); - const uint16 c_em = _uint16_or( c_e, c_m_no_grs ); - const uint16 c_normal = _uint16_or( c_s, c_em ); - const uint16 c_inf_result = _uint16_sels( is_a_inf_msb, c_inf, c_normal ); - const uint16 c_zero_result = _uint16_andc( c_inf_result, is_diff_exactly_zero ); - const uint16 c_result = _uint16_sels( is_invalid_inf_op_msb, h_snan, c_zero_result ); - - return (c_result); -} diff --git a/src/nvmath/Half.h b/src/nvmath/Half.h deleted file mode 100644 index 2dfd51a..0000000 --- a/src/nvmath/Half.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NV_MATH_HALF_H -#define NV_MATH_HALF_H - -#include - -uint32 half_to_float( uint16 h ); -uint16 half_from_float( uint32 f ); - -#endif /* NV_MATH_HALF_H */ diff --git a/src/nvmath/Matrix.h b/src/nvmath/Matrix.h index 8a76896..28749ba 100644 --- a/src/nvmath/Matrix.h +++ b/src/nvmath/Matrix.h @@ -24,8 +24,6 @@ public: Matrix(zero_t); Matrix(identity_t); Matrix(const Matrix & m); - Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3); - Matrix(const scalar m[]); // m is assumed to contain 16 elements scalar data(uint idx) const; scalar & data(uint idx); @@ -77,21 +75,6 @@ inline Matrix::Matrix(const Matrix & m) } } -inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3) -{ - m_data[ 0] = v0.x(); m_data[ 1] = v0.y(); m_data[ 2] = v0.z(); m_data[ 3] = v0.w(); - m_data[ 4] = v1.x(); m_data[ 5] = v1.y(); m_data[ 6] = v1.z(); m_data[ 7] = v1.w(); - m_data[ 8] = v2.x(); m_data[ 9] = v2.y(); m_data[10] = v2.z(); m_data[11] = v2.w(); - m_data[12] = v3.x(); m_data[13] = v3.y(); m_data[14] = v3.z(); m_data[15] = v3.w(); -} - -inline Matrix::Matrix(const scalar m[]) -{ - for(int i = 0; i < 16; i++) { - m_data[i] = m[i]; - } -} - // Accessors inline scalar Matrix::data(uint idx) const diff --git a/src/nvmath/Montecarlo.cpp b/src/nvmath/Montecarlo.cpp new file mode 100644 index 0000000..4cd23a5 --- /dev/null +++ b/src/nvmath/Montecarlo.cpp @@ -0,0 +1,156 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include + +using namespace nv; + + +void SampleDistribution::redistribute(Method method/*=Method_NRook*/, Distribution dist/*=Distribution_Cosine*/) +{ + switch(method) + { + case Method_Random: + redistributeRandom(dist); + break; + case Method_Stratified: + redistributeStratified(dist); + break; + case Method_NRook: + redistributeNRook(dist); + break; + }; +} + +void SampleDistribution::redistributeRandom(const Distribution dist) +{ + const uint sampleCount = m_sampleArray.count(); + + // This is the worst method possible! + for(uint i = 0; i < sampleCount; i++) + { + float x = m_rand.getFloat(); + float y = m_rand.getFloat(); + + // Map uniform distribution in the square to the (hemi)sphere. + if( dist == Distribution_Uniform ) { + m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y); + } + else { + nvDebugCheck(dist == Distribution_Cosine); + m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y); + } + } +} + + +void SampleDistribution::redistributeStratified(const Distribution dist) +{ + const uint sampleCount = m_sampleArray.count(); + const uint sqrtSampleCount = uint(sqrtf(float(sampleCount))); + + nvDebugCheck(sqrtSampleCount*sqrtSampleCount == sampleCount); // Must use exact powers! + + // Create a uniform distribution of points on the hemisphere with low variance. + for(uint v = 0, i = 0; v < sqrtSampleCount; v++) { + for(uint u = 0; u < sqrtSampleCount; u++, i++) { + float x = (u + m_rand.getFloat()) / float(sqrtSampleCount); + float y = (v + m_rand.getFloat()) / float(sqrtSampleCount); + + // Map uniform distribution in the square to the (hemi)sphere. + if( dist == Distribution_Uniform ) { + m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y); + } + else { + nvDebugCheck(dist == Distribution_Cosine); + m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y); + } + } + } +} + + +/** Multi-Stage N-rooks Sampling Method. + * See: http://www.acm.org/jgt/papers/WangSung9/9 + */ +void SampleDistribution::multiStageNRooks(const int size, int* cells) +{ + if (size == 1) { + return; + } + + int size1 = size >> 1; + int size2 = size >> 1; + + if (size & 1) { + if (m_rand.getFloat() > 0.5) { + size1++; + } + else { + size2++; + } + } + + int* upper_cells = new int[size1]; + int* lower_cells = new int[size2]; + + int i, j; + for(i = 0, j = 0; i < size - 1; i += 2, j++) { + if (m_rand.get() & 1) { + upper_cells[j] = cells[i]; + lower_cells[j] = cells[i + 1]; + } + else { + upper_cells[j] = cells[i + 1]; + lower_cells[j] = cells[i]; + } + } + + if (size1 != size2) { + if (size1 > size2) { + upper_cells[j] = cells[i]; + } + else { + lower_cells[j] = cells[i]; + } + } + + multiStageNRooks(size1, upper_cells); + memcpy(cells, upper_cells, size1 * sizeof(int)); + delete [] upper_cells; + + multiStageNRooks(size2, lower_cells); + memcpy(cells + size1, lower_cells, size2 * sizeof(int)); + delete [] lower_cells; +} + + +void SampleDistribution::redistributeNRook(const Distribution dist) +{ + const uint sampleCount = m_sampleArray.count(); + + // Generate nrook cells + int * cells = new int[sampleCount]; + for(uint32 i = 0; i < sampleCount; i++) + { + cells[i] = i; + } + multiStageNRooks(sampleCount, cells); + + for(uint i = 0; i < sampleCount; i++) + { + float x = (i + m_rand.getFloat()) / sampleCount; + float y = (cells[i] + m_rand.getFloat()) / sampleCount; + + // Map uniform distribution in the square to the (hemi)sphere. + if( dist == Distribution_Uniform ) { + m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y); + } + else { + nvDebugCheck(dist == Distribution_Cosine); + m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y); + } + } + + delete [] cells; +} + diff --git a/src/nvmath/Montecarlo.h b/src/nvmath/Montecarlo.h new file mode 100644 index 0000000..efda315 --- /dev/null +++ b/src/nvmath/Montecarlo.h @@ -0,0 +1,84 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_MATH_MONTECARLO_H +#define NV_MATH_MONTECARLO_H + +#include +#include + +namespace nv +{ + +/// A random sample distribution. +class SampleDistribution +{ +public: + + // Sampling method. + enum Method { + Method_Random, + Method_Stratified, + Method_NRook + }; + + // Distribution functions. + enum Distribution { + Distribution_Uniform, + Distribution_Cosine + }; + + /// Constructor. + SampleDistribution(int num) + { + m_sampleArray.resize(num); + } + + void redistribute(Method method=Method_NRook, Distribution dist=Distribution_Cosine); + + /// Get parametric coordinates of the sample. + Vector2 sample(int i) { return m_sampleArray[i].uv; } + + /// Get sample direction. + Vector3 sampleDir(int i) { return m_sampleArray[i].dir; } + + /// Get number of samples. + uint sampleCount() const { return m_sampleArray.count(); } + +private: + + void redistributeRandom(const Distribution dist); + void redistributeStratified(const Distribution dist); + void multiStageNRooks(const int size, int* cells); + void redistributeNRook(const Distribution dist); + + + /// A sample of the random distribution. + struct Sample + { + /// Set sample given the 3d coordinates. + void setDir(float x, float y, float z) { + dir.set(x, y, z); + uv.set(acosf(z), atan2f(y, x)); + } + + /// Set sample given the 2d parametric coordinates. + void setUV(float u, float v) { + uv.set(u, v); + dir.set(sinf(u) * cosf(v), sinf(u) * sinf(v), cosf(u)); + } + + Vector2 uv; + Vector3 dir; + }; + + /// Random seed. + MTRand m_rand; + + /// Samples. + Array m_sampleArray; + +}; + +} // nv namespace + +#endif // NV_MATH_MONTECARLO_H diff --git a/src/nvmath/Plane.h b/src/nvmath/Plane.h index 8a2f57d..bc7128c 100644 --- a/src/nvmath/Plane.h +++ b/src/nvmath/Plane.h @@ -59,7 +59,7 @@ namespace nv return Plane(plane.asVector() * inv); } - // Get the signed distance from the given point to this plane. + // Get the distance from the given point to this plane. inline float distance(Plane::Arg plane, Vector3::Arg point) { return dot(plane.vector(), point) - plane.offset(); diff --git a/src/nvmath/Quaternion.h b/src/nvmath/Quaternion.h new file mode 100644 index 0000000..b5007cc --- /dev/null +++ b/src/nvmath/Quaternion.h @@ -0,0 +1,128 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_MATH_QUATERNION_H +#define NV_MATH_QUATERNION_H + +#include +#include + +namespace nv +{ + + class NVMATH_CLASS Quaternion + { + public: + typedef Quaternion const & Arg; + + Quaternion(); + explicit Quaternion(zero_t); + Quaternion(float x, float y, float z, float w); + Quaternion(Vector4::Arg v); + + const Quaternion & operator=(Quaternion::Arg v); + + scalar x() const; + scalar y() const; + scalar z() const; + scalar w() const; + + const Vector4 & asVector() const; + Vector4 & asVector(); + + private: + Vector4 q; + }; + + inline Quaternion::Quaternion() {} + inline Quaternion::Quaternion(zero_t) : q(zero) {} + inline Quaternion::Quaternion(float x, float y, float z, float w) : q(x, y, z, w) {} + inline Quaternion::Quaternion(Vector4::Arg v) : q(v) {} + + inline const Quaternion & Quaternion::operator=(Quaternion::Arg v) { q = v.q; return *this; } + + inline scalar Quaternion::x() const { return q.x(); } + inline scalar Quaternion::y() const { return q.y(); } + inline scalar Quaternion::z() const { return q.z(); } + inline scalar Quaternion::w() const { return q.w(); } + + inline const Vector4 & Quaternion::asVector() const { return q; } + inline Vector4 & Quaternion::asVector() { return q; } + + + inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b) + { + // @@ Efficient SIMD implementation? + return Quaternion( + + a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(), + - a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(), + + a.x() * b.y() - a.y()*b.x() + a.z()*b.w() + a.w()*b.z(), + - a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w()); + } + + inline Quaternion scale(Quaternion::Arg q, float s) + { + return scale(q.asVector(), s); + } + inline Quaternion operator *(Quaternion::Arg q, float s) + { + return scale(q, s); + } + inline Quaternion operator *(float s, Quaternion::Arg q) + { + return scale(q, s); + } + + inline Quaternion scale(Quaternion::Arg q, Vector4::Arg s) + { + return scale(q.asVector(), s); + } + /*inline Quaternion operator *(Quaternion::Arg q, Vector4::Arg s) + { + return scale(q, s); + } + inline Quaternion operator *(Vector4::Arg s, Quaternion::Arg q) + { + return scale(q, s); + }*/ + + inline Quaternion conjugate(Quaternion::Arg q) + { + return scale(q, Vector4(-1, -1, -1, 1)); + } + + inline float length(Quaternion::Arg q) + { + return length(q.asVector()); + } + + inline bool isNormalized(Quaternion::Arg q, float epsilon = NV_NORMAL_EPSILON) + { + return equal(length(q), 1, epsilon); + } + + inline Quaternion normalize(Quaternion::Arg q, float epsilon = NV_EPSILON) + { + float l = length(q); + nvDebugCheck(!isZero(l, epsilon)); + Quaternion n = scale(q, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; + } + + inline Quaternion inverse(Quaternion::Arg q) + { + return conjugate(normalize(q)); + } + + /// Create a rotation quaternion for @a angle alpha around normal vector @a v. + inline Quaternion axisAngle(Vector3::Arg v, float alpha) + { + float s = sinf(alpha * 0.5f); + float c = cosf(alpha * 0.5f); + return Quaternion(Vector4(v * s, c)); + } + + +} // nv namespace + +#endif // NV_MATH_QUATERNION_H diff --git a/src/nvmath/Random.cpp b/src/nvmath/Random.cpp new file mode 100644 index 0000000..6178134 --- /dev/null +++ b/src/nvmath/Random.cpp @@ -0,0 +1,54 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include +#include + +using namespace nv; + +// Statics +const uint16 Rand48::a0 = 0xE66D; +const uint16 Rand48::a1 = 0xDEEC; +const uint16 Rand48::a2 = 0x0005; +const uint16 Rand48::c0 = 0x000B; + + +/// Get a random seed based on the current time. +uint Rand::randomSeed() +{ + return (uint)time(NULL); +} + + +void MTRand::initialize( uint32 seed ) +{ + // Initialize generator state with seed + // See Knuth TAOCP Vol 2, 3rd Ed, p.106 for multiplier. + // In previous versions, most significant bits (MSBs) of the seed affect + // only MSBs of the state array. Modified 9 Jan 2002 by Makoto Matsumoto. + uint32 *s = state; + uint32 *r = state; + int i = 1; + *s++ = seed & 0xffffffffUL; + for( ; i < N; ++i ) + { + *s++ = ( 1812433253UL * ( *r ^ (*r >> 30) ) + i ) & 0xffffffffUL; + r++; + } +} + + +void MTRand::reload() +{ + // Generate N new values in state + // Made clearer and faster by Matthew Bellew (matthew.bellew@home.com) + uint32 *p = state; + int i; + for( i = N - M; i--; ++p ) + *p = twist( p[M], p[0], p[1] ); + for( i = M; --i; ++p ) + *p = twist( p[M-N], p[0], p[1] ); + *p = twist( p[M-N], p[0], state[0] ); + + left = N, next = state; +} + diff --git a/src/nvmath/Random.h b/src/nvmath/Random.h new file mode 100644 index 0000000..0f76949 --- /dev/null +++ b/src/nvmath/Random.h @@ -0,0 +1,368 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_MATH_RANDOM_H +#define NV_MATH_RANDOM_H + +#include // nextPowerOfTwo +#include + +namespace nv +{ + +/// Interface of the random number generators. +class Rand +{ +public: + + virtual ~Rand() {} + + enum time_e { Time }; + + /// Provide a new seed. + virtual void seed( uint s ) { /* empty */ }; + + /// Get an integer random number. + virtual uint get() = 0; + + /// Get a random number on [0, max] interval. + uint getRange( uint max ) + { + uint n; + // uint mask = Bitmask( max ); + // do { n = Get() & mask; } while( n > max ); + uint np2 = nextPowerOfTwo( max ); + do { n = get() & (np2-1); } while( n > max ); + return n; + } + + /// Random number on [0.0, 1.0] interval. + float getFloat() + { + union + { + uint32 i; + float f; + } pun; + + pun.i = 0x3f800000UL | (get() & 0x007fffffUL); + return pun.f - 1.0f; + } + + /* + /// Random number on [0.0, 1.0] interval. + double getReal() + { + return double(get()) * (1.0/4294967295.0); // 2^32-1 + } + + /// Random number on [0.0, 1.0) interval. + double getRealExclusive() + { + return double(get()) * (1.0/4294967296.0); // 2^32 + } + */ + + /// Get the max value of the random number. + uint max() const { return 4294967295U; } + + // Get a random seed. + static uint randomSeed(); + +}; + + +/// Very simple random number generator with low storage requirements. +class SimpleRand : public Rand +{ +public: + + /// Constructor that uses the current time as the seed. + SimpleRand( time_e ) + { + seed(randomSeed()); + } + + /// Constructor that uses the given seed. + SimpleRand( uint s = 0 ) + { + seed(s); + } + + /// Set the given seed. + virtual void seed( uint s ) + { + current = s; + } + + /// Get a random number. + virtual uint get() + { + return current = current * 1103515245 + 12345; + } + +private: + + uint current; + +}; + + +/// Mersenne twister random number generator. +class MTRand : public Rand +{ +public: + + enum { N = 624 }; // length of state vector + enum { M = 397 }; + + /// Constructor that uses the current time as the seed. + MTRand( time_e ) + { + seed(randomSeed()); + } + + /// Constructor that uses the given seed. + MTRand( uint s = 0 ) + { + seed(s); + } + + /// Constructor that uses the given seeds. + NVMATH_API MTRand( const uint * seed_array, uint length ); + + + /// Provide a new seed. + virtual void seed( uint s ) + { + initialize(s); + reload(); + } + + /// Get a random number between 0 - 65536. + virtual uint get() + { + // Pull a 32-bit integer from the generator state + // Every other access function simply transforms the numbers extracted here + if( left == 0 ) { + reload(); + } + left--; + + uint s1; + s1 = *next++; + s1 ^= (s1 >> 11); + s1 ^= (s1 << 7) & 0x9d2c5680U; + s1 ^= (s1 << 15) & 0xefc60000U; + return ( s1 ^ (s1 >> 18) ); + }; + + +private: + + NVMATH_API void initialize( uint32 seed ); + NVMATH_API void reload(); + + uint hiBit( uint u ) const { return u & 0x80000000U; } + uint loBit( uint u ) const { return u & 0x00000001U; } + uint loBits( uint u ) const { return u & 0x7fffffffU; } + uint mixBits( uint u, uint v ) const { return hiBit(u) | loBits(v); } + uint twist( uint m, uint s0, uint s1 ) const { return m ^ (mixBits(s0,s1)>>1) ^ ((~loBit(s1)+1) & 0x9908b0dfU); } + +private: + + uint state[N]; // internal state + uint * next; // next value to get from state + int left; // number of values left before reload needed + +}; + + + +/** George Marsaglia's random number generator. + * Code based on Thatcher Ulrich public domain source code: + * http://cvs.sourceforge.net/viewcvs.py/tu-testbed/tu-testbed/base/tu_random.cpp?rev=1.7&view=auto + * + * PRNG code adapted from the complimentary-multiply-with-carry + * code in the article: George Marsaglia, "Seeds for Random Number + * Generators", Communications of the ACM, May 2003, Vol 46 No 5, + * pp90-93. + * + * The article says: + * + * "Any one of the choices for seed table size and multiplier will + * provide a RNG that has passed extensive tests of randomness, + * particularly those in [3], yet is simple and fast -- + * approximately 30 million random 32-bit integers per second on a + * 850MHz PC. The period is a*b^n, where a is the multiplier, n + * the size of the seed table and b=2^32-1. (a is chosen so that + * b is a primitive root of the prime a*b^n + 1.)" + * + * [3] Marsaglia, G., Zaman, A., and Tsang, W. Toward a universal + * random number generator. _Statistics and Probability Letters + * 8_ (1990), 35-39. + */ +class GMRand : public Rand +{ +public: + + enum { SEED_COUNT = 8 }; + +// const uint64 a = 123471786; // for SEED_COUNT=1024 +// const uint64 a = 123554632; // for SEED_COUNT=512 +// const uint64 a = 8001634; // for SEED_COUNT=255 +// const uint64 a = 8007626; // for SEED_COUNT=128 +// const uint64 a = 647535442; // for SEED_COUNT=64 +// const uint64 a = 547416522; // for SEED_COUNT=32 +// const uint64 a = 487198574; // for SEED_COUNT=16 +// const uint64 a = 716514398U; // for SEED_COUNT=8 + enum { a = 716514398U }; + + + GMRand( time_e ) + { + seed(randomSeed()); + } + + GMRand(uint s = 987654321) + { + seed(s); + } + + + /// Provide a new seed. + virtual void seed( uint s ) + { + c = 362436; + i = SEED_COUNT - 1; + + for(int i = 0; i < SEED_COUNT; i++) { + s = s ^ (s << 13); + s = s ^ (s >> 17); + s = s ^ (s << 5); + Q[i] = s; + } + } + + /// Get a random number between 0 - 65536. + virtual uint get() + { + const uint32 r = 0xFFFFFFFE; + + uint64 t; + uint32 x; + + i = (i + 1) & (SEED_COUNT - 1); + t = a * Q[i] + c; + c = uint32(t >> 32); + x = uint32(t + c); + + if( x < c ) { + x++; + c++; + } + + uint32 val = r - x; + Q[i] = val; + return val; + }; + + +private: + + uint32 c; + uint32 i; + uint32 Q[8]; + +}; + + +/** Random number implementation from the GNU Sci. Lib. (GSL). + * Adapted from Nicholas Chapman version: + * + * Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough + * This is the Unix rand48() generator. The generator returns the + * upper 32 bits from each term of the sequence, + * + * x_{n+1} = (a x_n + c) mod m + * + * using 48-bit unsigned arithmetic, with a = 0x5DEECE66D , c = 0xB + * and m = 2^48. The seed specifies the upper 32 bits of the initial + * value, x_1, with the lower 16 bits set to 0x330E. + * + * The theoretical value of x_{10001} is 244131582646046. + * + * The period of this generator is ? FIXME (probably around 2^48). + */ +class Rand48 : public Rand +{ +public: + + Rand48( time_e ) + { + seed(randomSeed()); + } + + Rand48( uint s = 0x1234ABCD ) + { + seed(s); + } + + + /** Set the given seed. */ + virtual void seed( uint s ) { + vstate.x0 = 0x330E; + vstate.x1 = uint16(s & 0xFFFF); + vstate.x2 = uint16((s >> 16) & 0xFFFF); + } + + /** Get a random number. */ + virtual uint get() { + + advance(); + + uint x1 = vstate.x1; + uint x2 = vstate.x2; + return (x2 << 16) + x1; + } + + +private: + + void advance() + { + /* work with unsigned long ints throughout to get correct integer + promotions of any unsigned short ints */ + const uint32 x0 = vstate.x0; + const uint32 x1 = vstate.x1; + const uint32 x2 = vstate.x2; + + uint32 a; + a = a0 * x0 + c0; + + vstate.x0 = uint16(a & 0xFFFF); + a >>= 16; + + /* although the next line may overflow we only need the top 16 bits + in the following stage, so it does not matter */ + + a += a0 * x1 + a1 * x0; + vstate.x1 = uint16(a & 0xFFFF); + + a >>= 16; + a += a0 * x2 + a1 * x1 + a2 * x0; + vstate.x2 = uint16(a & 0xFFFF); + } + + +private: + NVMATH_API static const uint16 a0, a1, a2, c0; + + struct rand48_state_t { + uint16 x0, x1, x2; + } vstate; + +}; + +} // nv namespace + +#endif // NV_MATH_RANDOM_H diff --git a/src/nvmath/SphericalHarmonic.cpp b/src/nvmath/SphericalHarmonic.cpp new file mode 100644 index 0000000..f005b51 --- /dev/null +++ b/src/nvmath/SphericalHarmonic.cpp @@ -0,0 +1,241 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include + +using namespace nv; + + +namespace +{ + + // Basic integer factorial. + inline static int factorial( int v ) + { + if (v == 0) { + return 1; + } + + int result = v; + while (--v > 0) { + result *= v; + } + return result; + } + + + // Double factorial. + // Defined as: n!! = n*(n - 2)*(n - 4)..., n!!(0,-1) = 1. + inline static int doubleFactorial( int x ) + { + if (x == 0 || x == -1) { + return 1; + } + + int result = x; + while ((x -= 2) > 0) { + result *= x; + } + + return result; + } + + /// Normalization constant for spherical harmonic. + /// @param l is the band. + /// @param m is the argument, in the range [0, m] + inline static float K( int l, int m ) + { + nvDebugCheck( m >= 0 ); + return sqrtf(((2 * l + 1) * factorial(l - m)) / (4 * PI * factorial(l + m))); + } + + /// Normalization constant for hemispherical harmonic. + inline static float HK( int l, int m ) + { + nvDebugCheck( m >= 0 ); + return sqrtf(((2 * l + 1) * factorial(l - m)) / (2 * PI * factorial(l + m))); + } + + /// Evaluate Legendre polynomial. */ + static float legendre( int l, int m, float x ) + { + // piDebugCheck( m >= 0 ); + // piDebugCheck( m <= l ); + // piDebugCheck( fabs(x) <= 1 ); + + // Rule 2 needs no previous results + if (l == m) { + return powf(-1.0f, float(m)) * doubleFactorial(2 * m - 1) * powf(1 - x*x, 0.5f * m); + } + + // Rule 3 requires the result for the same argument of the previous band + if (l == m + 1) { + return x * (2 * m + 1) * legendrePolynomial(m, m, x); + } + + // Main reccurence used by rule 1 that uses result of the same argument from + // the previous two bands + return (x * (2 * l - 1) * legendrePolynomial(l - 1, m, x) - (l + m - 1) * legendrePolynomial(l - 2, m, x)) / (l - m); + } + + + template float legendre(float x); + + template <> float legendre<0, 0>(float x) { + return 1; + } + + template <> float legendre<1, 0>(float x) { + return x; + } + template <> float legendre<1, 1>(float x) { + return -sqrtf(1 - x * x); + } + + template <> float legendre<2, 0>(float x) { + return -0.5f + (3 * x * x) / 2; + } + template <> float legendre<2, 1>(float x) { + return -3 * x * sqrtf(1 - x * x); + } + template <> float legendre<2, 2>(float x) { + return -3 * (-1 + x * x); + } + + template <> float legendre<3, 0>(float x) { + return -(3 * x) / 2 + (5 * x * x * x) / 2; + } + template <> float legendre<3, 1>(float x) { + return -3 * sqrtf(1 - x * x) / 2 * (-1 + 5 * x * x); + } + template <> float legendre<3, 2>(float x) { + return -15 * (-x + x * x * x); + } + template <> float legendre<3, 3>(float x) { + return -15 * powf(1 - x * x, 1.5f); + } + + template <> float legendre<4, 0>(float x) { + return 0.125f * (3.0f - 30.0f * x * x + 35.0f * x * x * x * x); + } + template <> float legendre<4, 1>(float x) { + return -2.5f * x * sqrtf(1.0f - x * x) * (7.0f * x * x - 3.0f); + } + template <> float legendre<4, 2>(float x) { + return -7.5f * (1.0f - 8.0f * x * x + 7.0f * x * x * x * x); + } + template <> float legendre<4, 3>(float x) { + return -105.0f * x * powf(1 - x * x, 1.5f); + } + template <> float legendre<4, 4>(float x) { + return 105.0f * (x * x - 1.0f) * (x * x - 1.0f); + } + +} // namespace + + +float nv::legendrePolynomial(int l, int m, float x) +{ + switch(l) + { + case 0: + return legendre<0, 0>(x); + case 1: + if(m == 0) return legendre<1, 0>(x); + return legendre<1, 1>(x); + case 2: + if(m == 0) return legendre<2, 0>(x); + else if(m == 1) return legendre<2, 1>(x); + return legendre<2, 2>(x); + case 3: + if(m == 0) return legendre<3, 0>(x); + else if(m == 1) return legendre<3, 1>(x); + else if(m == 2) return legendre<3, 2>(x); + return legendre<3, 3>(x); + case 4: + if(m == 0) return legendre<4, 0>(x); + else if(m == 1) return legendre<4, 1>(x); + else if(m == 2) return legendre<4, 2>(x); + else if(m == 3) return legendre<4, 3>(x); + else return legendre<4, 4>(x); + } + + // Fallback to the expensive version. + return legendre(l, m, x); +} + + +/** + * Evaluate the spherical harmonic function for the given angles. + * @param l is the band. + * @param m is the argument, in the range [-l,l] + * @param theta is the altitude, in the range [0, PI] + * @param phi is the azimuth, in the range [0, 2*PI] + */ +float nv::y( int l, int m, float theta, float phi ) +{ + if( m == 0 ) { + // K(l, 0) = sqrt((2*l+1)/(4*PI)) + return sqrtf((2 * l + 1) / (4 * PI)) * legendrePolynomial(l, 0, cosf(theta)); + } + else if( m > 0 ) { + return sqrtf(2.0f) * K(l, m) * cosf(m * phi) * legendrePolynomial(l, m, cosf(theta)); + } + else { + return sqrtf(2.0f) * K(l, -m) * sinf(-m * phi) * legendrePolynomial(l, -m, cosf(theta)); + } +} + + +/** + * Real spherical harmonic function of an unit vector. Uses the following + * equalities to call the angular function: + * x = sin(theta)*cos(phi) + * y = sin(theta)*sin(phi) + * z = cos(theta) + */ +float nv::y( int l, int m, Vector3::Arg v ) +{ + float theta = acosf(v.z()); + float phi = atan2f(v.y(), v.x()); + return y( l, m, theta, phi ); +} + + +/** + * Evaluate the hemispherical harmonic function for the given angles. + * @param l is the band. + * @param m is the argument, in the range [-l,l] + * @param theta is the altitude, in the range [0, PI/2] + * @param phi is the azimuth, in the range [0, 2*PI] + */ +float nv::hy( int l, int m, float theta, float phi ) +{ + if( m == 0 ) { + // HK(l, 0) = sqrt((2*l+1)/(2*PI)) + return sqrtf((2 * l + 1) / (2 * PI)) * legendrePolynomial(l, 0, 2*cosf(theta)-1); + } + else if( m > 0 ) { + return sqrtf(2.0f) * HK(l, m) * cosf(m * phi) * legendrePolynomial(l, m, 2*cosf(theta)-1); + } + else { + return sqrtf(2.0f) * HK(l, -m) * sinf(-m * phi) * legendrePolynomial(l, -m, 2*cosf(theta)-1); + } +} + + +/** + * Real hemispherical harmonic function of an unit vector. Uses the following + * equalities to call the angular function: + * x = sin(theta)*cos(phi) + * y = sin(theta)*sin(phi) + * z = cos(theta) + */ +float nv::hy( int l, int m, Vector3::Arg v ) +{ + float theta = acosf(v.z()); + float phi = atan2f(v.y(), v.x()); + return y( l, m, theta, phi ); +} + + + diff --git a/src/nvmath/SphericalHarmonic.h b/src/nvmath/SphericalHarmonic.h new file mode 100644 index 0000000..7e8341c --- /dev/null +++ b/src/nvmath/SphericalHarmonic.h @@ -0,0 +1,419 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_MATH_SPHERICALHARMONIC_H +#define NV_MATH_SPHERICALHARMONIC_H + +#include + +namespace nv +{ + + NVMATH_API float legendrePolynomial( int l, int m, float x ) NV_CONST; + NVMATH_API float y( int l, int m, float theta, float phi ) NV_CONST; + NVMATH_API float y( int l, int m, Vector3::Arg v ) NV_CONST; + NVMATH_API float hy( int l, int m, float theta, float phi ) NV_CONST; + NVMATH_API float hy( int l, int m, Vector3::Arg v ) NV_CONST; + + class Sh; + float dot(const Sh & a, const Sh & b) NV_CONST; + + + /// Spherical harmonic class. + class Sh + { + friend class Sh2; + friend class ShMatrix; + public: + + /// Construct a spherical harmonic of the given order. + Sh(int o) : m_order(o) + { + m_elemArray = new float[basisNum()]; + } + + /// Copy constructor. + Sh(const Sh & sh) : m_order(sh.order()) + { + m_elemArray = new float[basisNum()]; + memcpy(m_elemArray, sh.m_elemArray, sizeof(float) * basisNum()); + } + + /// Destructor. + ~Sh() + { + delete [] m_elemArray; + m_elemArray = NULL; + } + + /// Get number of bands. + static int bandNum(int order) { + return order + 1; + } + + /// Get number of sh basis. + static int basisNum(int order) { + return (order + 1) * (order + 1); + } + + /// Get the index for the given coefficients. + static int index( int l, int m ) { + return l * l + l + m; + } + + /// Get sh order. + int order() const + { + return m_order; + } + + /// Get sh order. + int bandNum() const + { + return bandNum(m_order); + } + + /// Get sh order. + int basisNum() const + { + return basisNum(m_order); + } + + /// Get sh coefficient indexed by l,m. + float elem( int l, int m ) const + { + return m_elemArray[index(l, m)]; + } + + /// Get sh coefficient indexed by l,m. + float & elem( int l, int m ) + { + return m_elemArray[index(l, m)]; + } + + + /// Get sh coefficient indexed by i. + float elemAt( int i ) const { + return m_elemArray[i]; + } + + /// Get sh coefficient indexed by i. + float & elemAt( int i ) + { + return m_elemArray[i]; + } + + + /// Reset the sh coefficients. + void reset() + { + for( int i = 0; i < basisNum(); i++ ) { + m_elemArray[i] = 0.0f; + } + } + + /// Copy spherical harmonic. + void operator= ( const Sh & sh ) + { + nvDebugCheck(order() <= sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] = sh.m_elemArray[i]; + } + } + + /// Add spherical harmonics. + void operator+= ( const Sh & sh ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] += sh.m_elemArray[i]; + } + } + + /// Substract spherical harmonics. + void operator-= ( const Sh & sh ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] -= sh.m_elemArray[i]; + } + } + + // Not exactly convolution, nor product. + void operator*= ( const Sh & sh ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] *= sh.m_elemArray[i]; + } + } + + /// Scale spherical harmonics. + void operator*= ( float f ) + { + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] *= f; + } + } + + /// Add scaled spherical harmonics. + void addScaled( const Sh & sh, float f ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] += sh.m_elemArray[i] * f; + } + } + + + /*/// Add a weighted sample to the sh coefficients. + void AddSample( const Vec3 & dir, const Color3f & color, float w=1.0f ) { + for(int l = 0; l <= order; l++) { + for(int m = -l; m <= l; m++) { + Color3f & elem = GetElem(l, m); + elem.Mad( elem, color, w * y(l, m, dir) ); + } + } + }*/ + + /// Evaluate + void eval(Vector3::Arg dir) + { + for(int l = 0; l <= m_order; l++) { + for(int m = -l; m <= l; m++) { + elem(l, m) = y(l, m, dir); + } + } + } + + + /// Evaluate the spherical harmonic function. + float sample(Vector3::Arg dir) const + { + Sh sh(order()); + sh.eval(dir); + + return dot(sh, *this); + } + + + protected: + + const int m_order; + float * m_elemArray; + + }; + + + /// Compute dot product of the spherical harmonics. + inline float dot(const Sh & a, const Sh & b) + { + nvDebugCheck(a.order() == b.order()); + + float sum = 0; + for( int i = 0; i < Sh::basisNum(a.order()); i++ ) { + sum += a.elemAt(i) * b.elemAt(i); + } + + return sum; + } + + + /// Second order spherical harmonic. + class Sh2 : public Sh + { + public: + + /// Constructor. + Sh2() : Sh(2) {} + + /// Copy constructor. + Sh2(const Sh2 & sh) : Sh(sh) {} + + /// Spherical harmonic resulting from projecting the clamped cosine transfer function to the SH basis. + void cosineTransfer() + { + const float c1 = 0.282095f; // K(0, 0) + const float c2 = 0.488603f; // K(1, 0) + const float c3 = 1.092548f; // sqrt(15.0f / PI) / 2.0f = K(2, -2) + const float c4 = 0.315392f; // sqrt(5.0f / PI) / 4.0f) = K(2, 0) + const float c5 = 0.546274f; // sqrt(15.0f / PI) / 4.0f) = K(2, 2) + + const float normalization = PI * 16.0f / 17.0f; + + const float const1 = c1 * normalization * 1.0f; + const float const2 = c2 * normalization * (2.0f / 3.0f); + const float const3 = c3 * normalization * (1.0f / 4.0f); + const float const4 = c4 * normalization * (1.0f / 4.0f); + const float const5 = c5 * normalization * (1.0f / 4.0f); + + m_elemArray[0] = const1; + + m_elemArray[1] = -const2; + m_elemArray[2] = const2; + m_elemArray[3] = -const2; + + m_elemArray[4] = const3; + m_elemArray[5] = -const3; + m_elemArray[6] = const4; + m_elemArray[7] = -const3; + m_elemArray[8] = const5; + } + }; + + + +#if 0 + +/// Spherical harmonic matrix. +class ShMatrix +{ +public: + + /// Create an identity matrix of the given order. + ShMatrix(int o = 2) : order(o), identity(true) + { + nvCheck(order > 0); + e = new float[Size()]; + band = new float *[GetBandNum()]; + setupBands(); + } + + /// Destroy and free matrix elements. + ~ShMatrix() + { + delete e; + delete band; + } + + /// Set identity matrix. + void setIdentity() + { + identity = true; + } + + /// Return true if this is an identity matrix, false in other case. + bool isIdentity() const { + return identity; + } + + /// Get number of bands of this matrix. + int bandNum() const + { + return order+1; + } + + /// Get total number of elements in the matrix. + int size() const + { + int size = 0; + for( int i = 0; i < bandNum(); i++ ) { + size += SQ(i * 2 + 1); + } + return size; + } + + /// Get element at the given raw index. + float elem(const int idx) const + { + return e[idx]; + } + + /// Get element at the given with the given indices. + float & elem( const int b, const int x, const int y ) + { + nvDebugCheck(b >= 0); + nvDebugCheck(b < bandNum()); + return band[b][(b + y) * (b * 2 + 1) + (b + x)]; + } + + /// Get element at the given with the given indices. + float elem( const int b, const int x, const int y ) const + { + nvDebugCheck(b >= 0); + nvDebugCheck(b < bandNum()); + return band[b][(b + y) * (b * 2 + 1) + (b + x)]; + } + + /** Copy matrix. */ + void Copy( const ShMatrix & m ) + { + nvDebugCheck(order == m.order); + memcpy(e, m.e, Size() * sizeof(float)); + } + + /** Rotate the given coefficients. */ + void transform( const Sh & restrict source, Sh * restrict dest ) const { + piCheck( &source != dest ); // Make sure there's no aliasing. + piCheck( dest->order <= order ); + piCheck( order <= source.order ); + + if( identity ) { + *dest = source; + return; + } + + // Loop through each band. + for( int l = 0; l <= dest->order; l++ ) { + + for( int mo = -l; mo <= l; mo++ ) { + + Color3f rgb = Color3f::Black; + + for( int mi = -l; mi <= l; mi++ ) { + rgb.Mad( rgb, source.elem(l, mi), elem(l, mo, mi) ); + } + + dest->elem(l, mo) = rgb; + } + } + } + + + MATHLIB_API void multiply( const ShMatrix &A, const ShMatrix &B ); + MATHLIB_API void rotation( const Matrix & m ); + MATHLIB_API void rotation( int axis, float angles ); + MATHLIB_API void print(); + + +private: + + // @@ These could be static indices precomputed only once. + /// Setup the band pointers. + void setupBands() + { + int size = 0; + for( int i = 0; i < bandNum(); i++ ) { + band[i] = &e[size]; + size += SQ(i * 2 + 1); + } + } + + +private: + + // Matrix order. + const int m_order; + + // Identity flag for quick transform. + bool m_identity; + + // Array of elements. + float * m_e; + + // Band pointers. + float ** m_band; + +}; + +#endif // 0 + + + +} // nv namespace + +#endif // NV_MATH_SPHERICALHARMONIC_H diff --git a/src/nvmath/TriBox.cpp b/src/nvmath/TriBox.cpp new file mode 100644 index 0000000..61d69bb --- /dev/null +++ b/src/nvmath/TriBox.cpp @@ -0,0 +1,226 @@ +/********************************************************/ +/* AABB-triangle overlap test code */ +/* by Tomas Akenine-Möller */ +/* Function: int triBoxOverlap(float boxcenter[3], */ +/* float boxhalfsize[3],float triverts[3][3]); */ +/* History: */ +/* 2001-03-05: released the code in its first version */ +/* 2001-06-18: changed the order of the tests, faster */ +/* */ +/* Acknowledgement: Many thanks to Pierre Terdiman for */ +/* suggestions and discussions on how to optimize code. */ +/* Thanks to David Hunt for finding a ">="-bug! */ +/********************************************************/ + +#include +#include + +using namespace nv; + +#define X 0 +#define Y 1 +#define Z 2 + +#define FINDMINMAX(x0,x1,x2,min,max) \ + min = max = x0; \ + if(x1max) max=x1;\ + if(x2max) max=x2; + + +static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg maxbox) // -NJMP- +{ + Vector3 vmin, vmax; + + float signs[3] = {1, 1, 1}; + if (normal.x() <= 0.0f) signs[0] = -1; + if (normal.y() <= 0.0f) signs[1] = -1; + if (normal.z() <= 0.0f) signs[2] = -1; + + Vector3 sign(signs[0], signs[1], signs[2]); + vmin = -scale(sign, maxbox) - vert; + vmax = scale(sign, maxbox) - vert; + + if (dot(normal, vmin) > 0.0f) return false; + if (dot(normal, vmax) >= 0.0f) return true; + + return false; +} + + +/*======================== X-tests ========================*/ +#define AXISTEST_X01(a, b, fa, fb) \ + p0 = a*v0.y() - b*v0.z(); \ + p2 = a*v2.y() - b*v2.z(); \ + if(p0rad || max<-rad) return false; + +#define AXISTEST_X2(a, b, fa, fb) \ + p0 = a*v0.y() - b*v0.z(); \ + p1 = a*v1.y() - b*v1.z(); \ + if(p0rad || max<-rad) return false; + +/*======================== Y-tests ========================*/ +#define AXISTEST_Y02(a, b, fa, fb) \ + p0 = -a*v0.x() + b*v0.z(); \ + p2 = -a*v2.x() + b*v2.z(); \ + if(p0rad || max<-rad) return false; + +#define AXISTEST_Y1(a, b, fa, fb) \ + p0 = -a*v0.x() + b*v0.z(); \ + p1 = -a*v1.x() + b*v1.z(); \ + if(p0rad || max<-rad) return false; + +/*======================== Z-tests ========================*/ + +#define AXISTEST_Z12(a, b, fa, fb) \ + p1 = a*v1.x() - b*v1.y(); \ + p2 = a*v2.x() - b*v2.y(); \ + if(p2rad || max<-rad) return false; + +#define AXISTEST_Z0(a, b, fa, fb) \ + p0 = a*v0.x() - b*v0.y(); \ + p1 = a*v1.x() - b*v1.y(); \ + if(p0rad || max<-rad) return false; + + +bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri) +{ + // use separating axis theorem to test overlap between triangle and box + // need to test for overlap in these directions: + // 1) the {x,y,z}-directions (actually, since we use the AABB of the triangle + // we do not even need to test these) + // 2) normal of the triangle + // 3) crossproduct(edge from tri, {x,y,z}-directin) + // this gives 3x3=9 more tests + Vector3 v0, v1, v2; + float min, max, p0, p1, p2, rad, fex, fey, fez; + Vector3 normal, e0, e1, e2; + + // This is the fastest branch on Sun. + // move everything so that the boxcenter is in (0,0,0) + v0 = tri.v[0] - boxcenter; + v1 = tri.v[1] - boxcenter; + v2 = tri.v[2] - boxcenter; + + // Compute triangle edges. + e0 = v1 - v0; // tri edge 0 + e1 = v2 - v1; // tri edge 1 + e2 = v0 - v2; // tri edge 2 + + // Bullet 3: + // test the 9 tests first (this was faster) + fex = fabsf(e0.x()); + fey = fabsf(e0.y()); + fez = fabsf(e0.z()); + AXISTEST_X01(e0.z(), e0.y(), fez, fey); + AXISTEST_Y02(e0.z(), e0.x(), fez, fex); + AXISTEST_Z12(e0.y(), e0.x(), fey, fex); + + fex = fabsf(e1.x()); + fey = fabsf(e1.y()); + fez = fabsf(e1.z()); + AXISTEST_X01(e1.z(), e1.y(), fez, fey); + AXISTEST_Y02(e1.z(), e1.x(), fez, fex); + AXISTEST_Z0(e1.y(), e1.x(), fey, fex); + + fex = fabsf(e2.x()); + fey = fabsf(e2.y()); + fez = fabsf(e2.z()); + AXISTEST_X2(e2.z(), e2.y(), fez, fey); + AXISTEST_Y1(e2.z(), e2.x(), fez, fex); + AXISTEST_Z12(e2.y(), e2.x(), fey, fex); + + // Bullet 1: + // first test overlap in the {x,y,z}-directions + // find min, max of the triangle each direction, and test for overlap in + // that direction -- this is equivalent to testing a minimal AABB around + // the triangle against the AABB + + // test in X-direction + FINDMINMAX(v0.x(), v1.x(), v2.x(), min, max); + if(min > boxhalfsize.x() || max < -boxhalfsize.x()) return false; + + // test in Y-direction + FINDMINMAX(v0.y(), v1.y(), v2.y(), min, max); + if(min > boxhalfsize.y() || max < -boxhalfsize.y()) return false; + + // test in Z-direction + FINDMINMAX(v0.z(), v1.z(), v2.z(), min, max); + if(min > boxhalfsize.z() || max < -boxhalfsize.z()) return false; + + // Bullet 2: + // test if the box intersects the plane of the triangle + // compute plane equation of triangle: normal*x+d=0 + normal = cross(e0, e1); + + return planeBoxOverlap(normal, v0, boxhalfsize); +} + + +bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri) +{ + // use separating axis theorem to test overlap between triangle and box + // need to test for overlap in these directions: + // 1) the {x,y,z}-directions (actually, since we use the AABB of the triangle + // we do not even need to test these) + // 2) normal of the triangle + // 3) crossproduct(edge from tri, {x,y,z}-directin) + // this gives 3x3=9 more tests + Vector3 v0, v1, v2; + float min, max, p0, p1, p2, rad, fex, fey, fez; + Vector3 normal, e0, e1, e2; + + // This is the fastest branch on Sun. + // move everything so that the boxcenter is in (0,0,0) + v0 = tri.v[0] - boxcenter; + v1 = tri.v[1] - boxcenter; + v2 = tri.v[2] - boxcenter; + + // Compute triangle edges. + e0 = v1 - v0; // tri edge 0 + e1 = v2 - v1; // tri edge 1 + e2 = v0 - v2; // tri edge 2 + + // Bullet 3: + // test the 9 tests first (this was faster) + fex = fabsf(e0.x()); + fey = fabsf(e0.y()); + fez = fabsf(e0.z()); + AXISTEST_X01(e0.z(), e0.y(), fez, fey); + AXISTEST_Y02(e0.z(), e0.x(), fez, fex); + AXISTEST_Z12(e0.y(), e0.x(), fey, fex); + + fex = fabsf(e1.x()); + fey = fabsf(e1.y()); + fez = fabsf(e1.z()); + AXISTEST_X01(e1.z(), e1.y(), fez, fey); + AXISTEST_Y02(e1.z(), e1.x(), fez, fex); + AXISTEST_Z0(e1.y(), e1.x(), fey, fex); + + fex = fabsf(e2.x()); + fey = fabsf(e2.y()); + fez = fabsf(e2.z()); + AXISTEST_X2(e2.z(), e2.y(), fez, fey); + AXISTEST_Y1(e2.z(), e2.x(), fez, fex); + AXISTEST_Z12(e2.y(), e2.x(), fey, fex); + + // Bullet 2: + // test if the box intersects the plane of the triangle + // compute plane equation of triangle: normal*x+d=0 + normal = cross(e0, e1); + + return planeBoxOverlap(normal, v0, boxhalfsize); +} diff --git a/src/nvmath/Triangle.cpp b/src/nvmath/Triangle.cpp new file mode 100644 index 0000000..f005297 --- /dev/null +++ b/src/nvmath/Triangle.cpp @@ -0,0 +1,168 @@ +// This code is in the public domain -- Ignacio Castaņo + +#include + +using namespace nv; + + +/// Tomas Möller, barycentric ray-triangle test. +bool rayTest_Moller(const Triangle & t, Vector3::Arg orig, Vector3::Arg dir, float * out_t, float * out_u, float * out_v) +{ + // find vectors for two edges sharing vert0 + Vector3 e1 = t.v[1] - t.v[0]; + Vector3 e2 = t.v[2] - t.v[0]; + + // begin calculating determinant - also used to calculate U parameter + Vector3 pvec = cross(dir, e2); + + // if determinant is near zero, ray lies in plane of triangle + float det = dot(e1, pvec); + if (det < -NV_EPSILON) { + return false; + } + + // calculate distance from vert0 to ray origin + Vector3 tvec = orig - t.v[0]; + + // calculate U parameter and test bounds + float u = dot(tvec, pvec); + if( u < 0.0f || u > det ) { + return false; + } + + // prepare to test V parameter + Vector3 qvec = cross(tvec, e1); + + // calculate V parameter and test bounds + float v = dot(dir, qvec); + if (v < 0.0f || u + v > det) { + return false; + } + + // calculate t, scale parameters, ray intersects triangle + float inv_det = 1.0f / det; + *out_t = dot(e2, qvec) * inv_det; + *out_u = u * inv_det; // v + *out_v = v * inv_det; // 1-(u+v) + + return true; +} + + + + + +#if 0 + + +// IC: This code is adapted from my Pi.MathLib code, based on Moller-Trumbore triangle test. +FXVector3 edge1, edge2, pvec, tvec, qvec; + +edge1 = tri.V1 - tri.V0; +edge2 = tri.V2 - tri.V0; + +pvec.Cross(ray.Direction, edge2); + +float det = FXVector3.Dot(edge1, pvec); + +// calculate distance from vert0 to ray origin. +FXVector3 tvec = ray.Origin - vert0; + +if( det < 0 ) +{ + // calculate U parameter and test bounds. + float u = FXVector3.Dot(tvec, pvec); + if (u > 0.0 || u < det) + { + return false; + } + + // prepare to test V parameter. + qvec.Cross(tvec, edge1); + + // calculate V parameter and test bounds. + float v = FXVector3.Dot(dir, qvec); + + return v <= 0.0 && u + v >= det; +} +else +{ + // calculate U parameter and test bounds. + float u = FXVector3.Dot(tvec, pvec); + if (u < 0.0 || u > det) + { + return false; + } + + // prepare to test V parameter. + qvec.Cross(tvec, edge1); + + // calculate V parameter and test bounds. + float v = FXVector3.Dot(dir, qvec); + + return v >= 0.0 && u + v <= det; +} + + + +/** + * Dan Sunday, parametric ray-triangle test. + */ +// Output: *I = intersection point (when it exists) +// Return: -1 = triangle is degenerate (a segment or point) +// 0 = disjoint (no intersect) +// 1 = intersect in unique point I1 +// 2 = are in the same plane +bool RayTriangleTest( const Vec3 &p0, const Vec3 &p1, + const Vec3 &v0, const Vec3 &v1, const Vec3 &v2, const Vec3 &n, + Vec3 &I ) { + Vec3 u, v; // triangle vectors + Vec3 dir, w0, w; // ray vectors + float r, a, b; // params to calc ray-plane intersect + + // get triangle edge vectors and plane normal + u.Sub( v1, v0 ); + v.Sub( v2, v0 ); + + dir.Sub( p1, p0 ); // ray direction vector + w0.Sub( p0, v0 ); + a = Vec3DotProduct( n, w0 ); + b = Vec3DotProduct( n, dir ); + + if( fabs(b) < TI_EPSILON ) // ray is parallel to triangle plane + return false; + + + // get intersect point of ray with triangle plane + r = -a / b; + if( r < 0.0f ) // ray goes away from triangle + return false; // => no intersect + + // for a segment, also test if (r > 1.0) => no intersect + + I.Mad( p0, dir, r ); // intersect point of ray and plane + + // is I inside T? + float uu, uv, vv, wu, wv, D; + uu = Vec3DotProduct( u, u ); + uv = Vec3DotProduct( u, v ); + vv = Vec3DotProduct( v, v ); + w = I - v0; + wu = Vec3DotProduct( w, u ); + wv = Vec3DotProduct( w, v ); + D = uv * uv - uu * vv; + + // get and test parametric coords + float s, t; + s = (uv * wv - vv * wu) / D; + if( s<0.0 || s > 1.0) // I is outside T + return false; + t = (uv * wu - uu * wv) / D; + if( t<0.0 || (s + t) > 1.0) // I is outside T + return false; + + return true; // I is in T +} + + +#endif // 0 diff --git a/src/nvmath/Triangle.h b/src/nvmath/Triangle.h new file mode 100644 index 0000000..7cd8db5 --- /dev/null +++ b/src/nvmath/Triangle.h @@ -0,0 +1,81 @@ +// This code is in the public domain -- Ignacio Castaņo + +#ifndef NV_MATH_TRIANGLE_H +#define NV_MATH_TRIANGLE_H + +#include +#include +#include + +namespace nv +{ + + /// Triangle class with three vertices. + class Triangle + { + public: + Triangle() {}; + + Triangle(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2) + { + v[0] = v0; + v[1] = v1; + v[2] = v2; + } + + /// Get the bounds of the triangle. + Box bounds() const + { + Box bounds; + bounds.clearBounds(); + bounds.addPointToBounds(v[0]); + bounds.addPointToBounds(v[1]); + bounds.addPointToBounds(v[2]); + return bounds; + } + + Vector4 plane() const + { + Vector3 n = cross(v[1]-v[0], v[2]-v[0]); + return Vector4(n, dot(n, v[0])); + } + + Vector3 v[3]; + }; + + + // Tomas Akenine-Möller box-triangle test. + NVMATH_API bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & triangle); + NVMATH_API bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & triangle); + + + // Moller ray triangle test. + NVMATH_API bool rayTest_Moller(const Triangle & t, Vector3::Arg orig, Vector3::Arg dir, float * out_t, float * out_u, float * out_v); + + inline bool rayTest(const Triangle & t, Vector3::Arg orig, Vector3::Arg dir, float * out_t, float * out_u, float * out_v) + { + return rayTest_Moller(t, orig, dir, out_t, out_u, out_v); + } + + inline bool overlap(const Triangle & t, const Box & b) + { + Vector3 center = b.center(); + Vector3 extents = b.extents(); + return triBoxOverlap(center, extents, t); + } + + inline bool overlap(const Box & b, const Triangle & t) + { + return overlap(t, b); + } + + inline bool overlapNoBounds(const Triangle & t, const Box & b) + { + Vector3 center = b.center(); + Vector3 extents = b.extents(); + return triBoxOverlapNoBounds(center, extents, t); + } + +} // nv namespace + +#endif // NV_MATH_TRIANGLE_H diff --git a/src/nvmath/Vector.h b/src/nvmath/Vector.h index 329531a..bffdfbe 100644 --- a/src/nvmath/Vector.h +++ b/src/nvmath/Vector.h @@ -4,7 +4,7 @@ #define NV_MATH_VECTOR_H #include -#include // min, max +#include // min, max namespace nv { @@ -27,7 +27,6 @@ public: Vector2(Vector2::Arg v); const Vector2 & operator=(Vector2::Arg v); - void setComponent(uint idx, scalar f); scalar x() const; scalar y() const; @@ -72,7 +71,6 @@ public: const Vector2 & xy() const; scalar component(uint idx) const; - void setComponent(uint idx, scalar f); const scalar * ptr() const; @@ -117,7 +115,6 @@ public: const Vector3 & xyz() const; scalar component(uint idx) const; - void setComponent(uint idx, scalar f); const scalar * ptr() const; @@ -164,14 +161,6 @@ inline scalar Vector2::component(uint idx) const return 0.0f; } -inline void Vector2::setComponent(uint idx, float f) -{ - nvDebugCheck(idx < 2); - if (idx == 0) m_x = f; - else if (idx == 1) m_y = f; -} - - inline const scalar * Vector2::ptr() const { return &m_x; @@ -250,21 +239,13 @@ inline const Vector2 & Vector3::xy() const inline scalar Vector3::component(uint idx) const { nvDebugCheck(idx < 3); - if (idx == 0) return m_x; - if (idx == 1) return m_y; - if (idx == 2) return m_z; + if (idx == 0) return x(); + if (idx == 1) return y(); + if (idx == 2) return z(); nvAssume(false); return 0.0f; } -inline void Vector3::setComponent(uint idx, float f) -{ - nvDebugCheck(idx < 3); - if (idx == 0) m_x = f; - else if (idx == 1) m_y = f; - else if (idx == 2) m_z = f; -} - inline const scalar * Vector3::ptr() const { return &m_x; @@ -372,15 +353,6 @@ inline scalar Vector4::component(uint idx) const return 0.0f; } -inline void Vector4::setComponent(uint idx, float f) -{ - nvDebugCheck(idx < 4); - if (idx == 0) m_x = f; - else if (idx == 1) m_y = f; - else if (idx == 2) m_z = f; - else if (idx == 3) m_w = f; -} - inline const scalar * Vector4::ptr() const { return &m_x; @@ -505,35 +477,6 @@ inline scalar length(Vector2::Arg v) return sqrtf(length_squared(v)); } -inline scalar inverse_length(Vector2::Arg v) -{ - return 1.0f / sqrtf(length_squared(v)); -} - -inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON) -{ - return equal(length(v), 1, epsilon); -} - -inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON) -{ - float l = length(v); - nvDebugCheck(!isZero(l, epsilon)); - Vector2 n = scale(v, 1.0f / l); - nvDebugCheck(isNormalized(n)); - return n; -} - -inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON) -{ - float l = length(v); - if (isZero(l, epsilon)) { - return fallback; - } - return scale(v, 1.0f / l); -} - - inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON) { return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon); @@ -652,11 +595,6 @@ inline scalar length(Vector3::Arg v) return sqrtf(length_squared(v)); } -inline scalar inverse_length(Vector3::Arg v) -{ - return 1.0f / sqrtf(length_squared(v)); -} - inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON) { return equal(length(v), 1, epsilon); @@ -778,11 +716,6 @@ inline scalar length(Vector4::Arg v) return sqrtf(length_squared(v)); } -inline scalar inverse_length(Vector4::Arg v) -{ - return 1.0f / sqrtf(length_squared(v)); -} - inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON) { return equal(length(v), 1, epsilon); diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h index bedf132..0318d4e 100644 --- a/src/nvmath/nvmath.h +++ b/src/nvmath/nvmath.h @@ -115,7 +115,7 @@ inline bool isFinite(const float f) { #if NV_OS_WIN32 return _finite(f) != 0; -#elif NV_OS_DARWIN || NV_OS_FREEBSD +#elif NV_OS_DARWIN return isfinite(f); #elif NV_OS_LINUX return finitef(f); @@ -130,7 +130,7 @@ inline bool isNan(const float f) { #if NV_OS_WIN32 return _isnan(f) != 0; -#elif NV_OS_DARWIN || NV_OS_FREEBSD +#elif NV_OS_DARWIN return isnan(f); #elif NV_OS_LINUX return isnanf(f); diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt index 0ba5552..9ce93d0 100644 --- a/src/nvtt/CMakeLists.txt +++ b/src/nvtt/CMakeLists.txt @@ -5,8 +5,8 @@ ADD_SUBDIRECTORY(squish) SET(NVTT_SRCS nvtt.h nvtt.cpp - Context.h - Context.cpp + Compressor.h + Compressor.cpp nvtt_wrapper.h nvtt_wrapper.cpp CompressDXT.h @@ -24,7 +24,6 @@ SET(NVTT_SRCS InputOptions.cpp OutputOptions.h OutputOptions.cpp - TexImage.h TexImage.cpp cuda/CudaUtils.h cuda/CudaUtils.cpp cuda/CudaMath.h @@ -32,19 +31,20 @@ SET(NVTT_SRCS cuda/CudaCompressDXT.h cuda/CudaCompressDXT.cpp) -IF (CUDA_FOUND) +IF(CUDA_FOUND) ADD_DEFINITIONS(-DHAVE_CUDA) - CUDA_COMPILE(CUDA_SRCS cuda/CompressKernel.cu) + WRAP_CUDA(CUDA_SRCS cuda/CompressKernel.cu) SET(NVTT_SRCS ${NVTT_SRCS} ${CUDA_SRCS}) SET(LIBS ${LIBS} ${CUDA_LIBRARIES}) - INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) -ENDIF (CUDA_FOUND) + INCLUDE_DIRECTORIES(${CUDA_INCLUDE_PATH}) +ENDIF(CUDA_FOUND) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) ADD_DEFINITIONS(-DNVTT_EXPORTS) -IF(NVTT_SHARED) +IF(NVTT_SHARED) + ADD_DEFINITIONS(-DNVTT_SHARED=1) ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS}) ELSE(NVTT_SHARED) ADD_LIBRARY(nvtt ${NVTT_SRCS}) @@ -60,5 +60,54 @@ INSTALL(TARGETS nvtt INSTALL(FILES nvtt.h DESTINATION include/nvtt) -ADD_SUBDIRECTORY(tools) -ADD_SUBDIRECTORY(tests) + +# test executables +ADD_EXECUTABLE(nvcompress tools/compress.cpp tools/cmdline.h) +TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt) + +ADD_EXECUTABLE(nvdecompress tools/decompress.cpp tools/cmdline.h) +TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage) + +ADD_EXECUTABLE(nvddsinfo tools/ddsinfo.cpp tools/cmdline.h) +TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage) + +ADD_EXECUTABLE(nvimgdiff tools/imgdiff.cpp tools/cmdline.h) +TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage) + +ADD_EXECUTABLE(nvassemble tools/assemble.cpp tools/cmdline.h) +TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage) + +ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h) +TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage) + +ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h) +TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage) + +INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom DESTINATION bin) + +# UI tools +IF(QT4_FOUND AND NOT MSVC) + SET(QT_USE_QTOPENGL TRUE) + INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + + SET(SRCS + tools/main.cpp + tools/configdialog.h + tools/configdialog.cpp) + + SET(LIBS + nvtt + ${QT_QTCORE_LIBRARY} + ${QT_QTGUI_LIBRARY} + ${QT_QTOPENGL_LIBRARY}) + + QT4_WRAP_UI(UICS tools/configdialog.ui) + QT4_WRAP_CPP(MOCS tools/configdialog.h) + #QT4_ADD_RESOURCES(RCCS tools/configdialog.rc) + + ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS}) + TARGET_LINK_LIBRARIES(nvcompressui ${LIBS}) + +ENDIF(QT4_FOUND AND NOT MSVC) + + diff --git a/src/nvtt/CompressDXT.cpp b/src/nvtt/CompressDXT.cpp index a8684d6..60f5264 100644 --- a/src/nvtt/CompressDXT.cpp +++ b/src/nvtt/CompressDXT.cpp @@ -1,675 +1,597 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "CompressDXT.h" -#include "QuickCompressDXT.h" -#include "OptimalCompressDXT.h" -#include "CompressionOptions.h" -#include "OutputOptions.h" - -// squish -#include "squish/colourset.h" -#include "squish/fastclusterfit.h" -#include "squish/weightedclusterfit.h" - -#include - -#include - -#include -#include -#include - - -// s3_quant -#if defined(HAVE_S3QUANT) -#include "s3tc/s3_quant.h" -#endif - -// ati tc -#if defined(HAVE_ATITC) -typedef int BOOL; -typedef _W64 unsigned long ULONG_PTR; -typedef ULONG_PTR DWORD_PTR; -#include "atitc/ATI_Compress.h" -#endif - -// squish -#if defined(HAVE_SQUISH) -//#include "squish/squish.h" -#include "squish-1.10/squish.h" -#endif - -// d3dx -#if defined(HAVE_D3DX) -#include -#endif - -// stb -#if defined(HAVE_STB) -#define STB_DEFINE -#include "stb/stb_dxt.h" -#endif - -#pragma message(NV_FILE_LINE "FIXME: Define HAVE_OPENMP from cmake.") -#define HAVE_OPENMP -#include - -using namespace nv; -using namespace nvtt; - - -void FixedBlockCompressor::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - const uint bs = blockSize(); - const uint bw = (w + 3) / 4; - const uint bh = (h + 3) / 4; - const uint size = bs * bw * bh; - -#if defined(HAVE_OPENMP) - bool singleThreaded = false; -#else - bool singleThreaded = true; -#endif - - // Use a single thread to compress small textures. - if (bw * bh < 16) singleThreaded = true; - - if (singleThreaded) - { - nvDebugCheck(bs <= 16); - uint8 mem[16]; - - for (int y = 0; y < int(h); y += 4) { - for (uint x = 0; x < w; x += 4) { - - ColorBlock rgba; - if (inputFormat == nvtt::InputFormat_BGRA_8UB) { - rgba.init(w, h, (uint *)data, x, y); - } - else { - nvDebugCheck(inputFormat == nvtt::InputFormat_RGBA_32F); - rgba.init(w, h, (float *)data, x, y); - } - - compressBlock(rgba, alphaMode, compressionOptions, mem); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(mem, bs); - } - } - } - } -#if defined(HAVE_OPENMP) - else - { - uint8 * mem = new uint8[size]; - - #pragma omp parallel - { - #pragma omp for - for (int i = 0; i < int(bw*bh); i++) - { - const uint x = i % bw; - const uint y = i / bw; - - ColorBlock rgba; - if (inputFormat == nvtt::InputFormat_BGRA_8UB) { - rgba.init(w, h, (uint *)data, 4*x, 4*y); - } - else { - nvDebugCheck(inputFormat == nvtt::InputFormat_RGBA_32F); - rgba.init(w, h, (float *)data, 4*x, 4*y); - } - - uint8 * ptr = mem + (y * bw + x) * bs; - compressBlock(rgba, alphaMode, compressionOptions, ptr); - } // omp for - } // omp parallel - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(mem, size); - } - - delete [] mem; - } -#endif -} - - -void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT1 * block = new(output) BlockDXT1; - QuickCompress::compressDXT1(rgba, block); -} - -void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT1 * block = new(output) BlockDXT1; - QuickCompress::compressDXT1a(rgba, block); -} - -void FastCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT3 * block = new(output) BlockDXT3; - QuickCompress::compressDXT3(rgba, block); -} - -void FastCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT5 * block = new(output) BlockDXT5; - QuickCompress::compressDXT5(rgba, block); -} - -void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R - - BlockDXT5 * block = new(output) BlockDXT5; - QuickCompress::compressDXT5(rgba, block); -} - -void FastCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockATI1 * block = new(output) BlockATI1; - - rgba.swizzle(0, 1, 2, 0); // Copy red to alpha - QuickCompress::compressDXT5A(rgba, &block->alpha); -} - -void FastCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockATI2 * block = new(output) BlockATI2; - - rgba.swizzle(0, 1, 2, 0); // Copy red to alpha - QuickCompress::compressDXT5A(rgba, &block->x); - - rgba.swizzle(0, 1, 2, 1); // Copy green to alpha - QuickCompress::compressDXT5A(rgba, &block->y); -} - - -void NormalCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); - - if (rgba.isSingleColor()) - { - BlockDXT1 * block = new(output) BlockDXT1; - OptimalCompress::compressDXT1(rgba.color(0), block); - } - else - { - nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0); - fit.SetColourSet(&colours, nvsquish::kDxt1); - fit.Compress(output); - } -} - - -void NormalCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - bool anyAlpha = false; - bool allAlpha = true; - - for (uint i = 0; i < 16; i++) - { - if (rgba.color(i).a < 128) anyAlpha = true; - else allAlpha = false; - } - - const bool isSingleColor = rgba.isSingleColor(); - - if ((!anyAlpha && isSingleColor || allAlpha)) - { - BlockDXT1 * block = new(output) BlockDXT1; - OptimalCompress::compressDXT1a(rgba.color(0), block); - } - else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); - - int flags = nvsquish::kDxt1; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); - fit.SetColourSet(&colours, nvsquish::kDxt1); - - fit.Compress(output); - } -} - - -void NormalCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT3 * block = new(output) BlockDXT3; - - // Compress explicit alpha. - OptimalCompress::compressDXT3A(rgba, &block->alpha); - - // Compress color. - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), &block->color); - } - else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); - - int flags = 0; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); - fit.SetColourSet(&colours, 0); - fit.Compress(&block->color); - } -} - - -void NormalCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT5 * block = new(output) BlockDXT5; - - // Compress alpha. - if (compressionOptions.quality == Quality_Highest) - { - OptimalCompress::compressDXT5A(rgba, &block->alpha); - } - else - { - QuickCompress::compressDXT5A(rgba, &block->alpha); - } - - // Compress color. - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), &block->color); - } - else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); - - int flags = 0; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); - fit.SetColourSet(&colours, 0); - fit.Compress(&block->color); - } -} - - -void NormalCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R - - BlockDXT5 * block = new(output) BlockDXT5; - - // Compress X. - if (compressionOptions.quality == Quality_Highest) - { - OptimalCompress::compressDXT5A(rgba, &block->alpha); - } - else - { - QuickCompress::compressDXT5A(rgba, &block->alpha); - } - - // Compress Y. - if (compressionOptions.quality == Quality_Highest) - { - OptimalCompress::compressDXT1G(rgba, &block->color); - } - else - { - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1G(rgba.color(0), &block->color); - } - else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(0, 1, 0); - - int flags = 0; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); - fit.SetColourSet(&colours, 0); - fit.Compress(&block->color); - } - } -} - - -void ProductionCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockATI1 * block = new(output) BlockATI1; - - rgba.swizzle(0, 1, 2, 0); // Copy red to alpha - OptimalCompress::compressDXT5A(rgba, &block->alpha); -} - -void ProductionCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockATI2 * block = new(output) BlockATI2; - - rgba.swizzle(0, 1, 2, 0); // Copy red to alpha - OptimalCompress::compressDXT5A(rgba, &block->x); - - rgba.swizzle(0, 1, 2, 1); // Copy green to alpha - OptimalCompress::compressDXT5A(rgba, &block->y); -} - - - -#if defined(HAVE_S3QUANT) - -void S3CompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - float error = 0.0f; - - BlockDXT1 dxtBlock3; - BlockDXT1 dxtBlock4; - ColorBlock block; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - block.init(inputFormat, w, h, data, x, y); - - // Init rgb block. - RGBBlock rgbBlock; - rgbBlock.n = 16; - for (uint i = 0; i < 16; i++) { - rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f); - rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f); - rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f); - } - rgbBlock.weight[0] = 1.0f; - rgbBlock.weight[1] = 1.0f; - rgbBlock.weight[2] = 1.0f; - - rgbBlock.inLevel = 4; - CodeRGBBlock(&rgbBlock); - - // Copy results to DXT block. - dxtBlock4.col0.r = rgbBlock.endPoint[0][0]; - dxtBlock4.col0.g = rgbBlock.endPoint[0][1]; - dxtBlock4.col0.b = rgbBlock.endPoint[0][2]; - - dxtBlock4.col1.r = rgbBlock.endPoint[1][0]; - dxtBlock4.col1.g = rgbBlock.endPoint[1][1]; - dxtBlock4.col1.b = rgbBlock.endPoint[1][2]; - - dxtBlock4.setIndices(rgbBlock.index); - - if (dxtBlock4.col0.u < dxtBlock4.col1.u) { - swap(dxtBlock4.col0.u, dxtBlock4.col1.u); - dxtBlock4.indices ^= 0x55555555; - } - - uint error4 = blockError(block, dxtBlock4); - - rgbBlock.inLevel = 3; - - CodeRGBBlock(&rgbBlock); - - // Copy results to DXT block. - dxtBlock3.col0.r = rgbBlock.endPoint[0][0]; - dxtBlock3.col0.g = rgbBlock.endPoint[0][1]; - dxtBlock3.col0.b = rgbBlock.endPoint[0][2]; - - dxtBlock3.col1.r = rgbBlock.endPoint[1][0]; - dxtBlock3.col1.g = rgbBlock.endPoint[1][1]; - dxtBlock3.col1.b = rgbBlock.endPoint[1][2]; - - dxtBlock3.setIndices(rgbBlock.index); - - if (dxtBlock3.col0.u > dxtBlock3.col1.u) { - swap(dxtBlock3.col0.u, dxtBlock3.col1.u); - dxtBlock3.indices ^= (~dxtBlock3.indices >> 1) & 0x55555555; - } - - uint error3 = blockError(block, dxtBlock3); - - if (error3 < error4) { - error += error3; - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3)); - } - } - else { - error += error4; - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4)); - } - } - } - } -} - -#endif // defined(HAVE_S3QUANT) - - -#if defined(HAVE_ATITC) - -void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - // Init source texture - ATI_TC_Texture srcTexture; - srcTexture.dwSize = sizeof(srcTexture); - srcTexture.dwWidth = w; - srcTexture.dwHeight = h; - if (inputFormat == nvtt::InputFormat_BGRA_8UB) - { - srcTexture.dwPitch = w * 4; - srcTexture.format = ATI_TC_FORMAT_ARGB_8888; - } - else - { - srcTexture.dwPitch = w * 16; - srcTexture.format = ATI_TC_FORMAT_ARGB_32F; - } - srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); - srcTexture.pData = (ATI_TC_BYTE*) data; - - // Init dest texture - ATI_TC_Texture destTexture; - destTexture.dwSize = sizeof(destTexture); - destTexture.dwWidth = w; - destTexture.dwHeight = h; - destTexture.dwPitch = 0; - destTexture.format = ATI_TC_FORMAT_DXT1; - destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); - destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); - - ATI_TC_CompressOptions options; - options.dwSize = sizeof(options); - options.bUseChannelWeighting = false; - options.bUseAdaptiveWeighting = false; - options.bDXT1UseAlpha = false; - options.nCompressionSpeed = ATI_TC_Speed_Normal; - options.bDisableMultiThreading = false; - //options.bDisableMultiThreading = true; - - // Compress - ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); - } - - mem::free(destTexture.pData); -} - -void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - // Init source texture - ATI_TC_Texture srcTexture; - srcTexture.dwSize = sizeof(srcTexture); - srcTexture.dwWidth = w; - srcTexture.dwHeight = h; - if (inputFormat == nvtt::InputFormat_BGRA_8UB) - { - srcTexture.dwPitch = w * 4; - srcTexture.format = ATI_TC_FORMAT_ARGB_8888; - } - else - { - srcTexture.dwPitch = w * 16; - srcTexture.format = ATI_TC_FORMAT_ARGB_32F; - } - srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); - srcTexture.pData = (ATI_TC_BYTE*) data; - - // Init dest texture - ATI_TC_Texture destTexture; - destTexture.dwSize = sizeof(destTexture); - destTexture.dwWidth = w; - destTexture.dwHeight = h; - destTexture.dwPitch = 0; - destTexture.format = ATI_TC_FORMAT_DXT5; - destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); - destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); - - // Compress - ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); - } - - mem::free(destTexture.pData); -} - -#endif // defined(HAVE_ATITC) - -#if defined(HAVE_SQUISH) - -void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ -#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB") - /* - Image img(*image); - int count = img.width() * img.height(); - for (int i = 0; i < count; i++) - { - Color32 c = img.pixel(i); - img.pixel(i) = Color32(c.b, c.g, c.r, c.a); - } - - int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1); - void * blocks = mem::malloc(size); - - squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(blocks, size); - } - - mem::free(blocks); - */ -} - -#endif // defined(HAVE_SQUISH) - - -#if defined(HAVE_D3DX) - -void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION); - - D3DPRESENT_PARAMETERS presentParams; - ZeroMemory(&presentParams, sizeof(presentParams)); - presentParams.Windowed = TRUE; - presentParams.SwapEffect = D3DSWAPEFFECT_COPY; - presentParams.BackBufferWidth = 8; - presentParams.BackBufferHeight = 8; - presentParams.BackBufferFormat = D3DFMT_UNKNOWN; - - HRESULT err; - - IDirect3DDevice9 * device = NULL; - err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device); - - IDirect3DTexture9 * texture = NULL; - err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture); - - IDirect3DSurface9 * surface = NULL; - err = texture->GetSurfaceLevel(0, &surface); - - RECT rect; - rect.left = 0; - rect.top = 0; - rect.bottom = h; - rect.right = w; - - if (inputFormat == nvtt::InputFormat_BGRA_8UB) - { - err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0); - } - else - { - err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0); - } - - if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA) - { - D3DLOCKED_RECT rect; - ZeroMemory(&rect, sizeof(rect)); - - err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY); - - if (outputOptions.outputHandler != NULL) { - int size = rect.Pitch * ((h + 3) / 4); - outputOptions.outputHandler->writeData(rect.pBits, size); - } - - err = surface->UnlockRect(); - } - - surface->Release(); - device->Release(); - d3d->Release(); -} - -#endif // defined(HAVE_D3DX) - - -#if defined(HAVE_STB) - -void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - rgba.swizzle(2, 1, 0, 3); // Swap R and B - stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0); -} - - -#endif // defined(HAVE_STB) +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include + +#include +#include +#include + +#include "nvtt.h" +#include "CompressDXT.h" +#include "QuickCompressDXT.h" +#include "OptimalCompressDXT.h" +#include "CompressionOptions.h" +#include "OutputOptions.h" + +// squish +#include "squish/colourset.h" +//#include "squish/clusterfit.h" +#include "squish/fastclusterfit.h" +#include "squish/weightedclusterfit.h" + + +// s3_quant +#if defined(HAVE_S3QUANT) +#include "s3tc/s3_quant.h" +#endif + +// ati tc +#if defined(HAVE_ATITC) +#include "atitc/ATI_Compress.h" +#endif + +//#include + +using namespace nv; +using namespace nvtt; + + +nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None) +{ +} + +nv::FastCompressor::~FastCompressor() +{ +} + +void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode) +{ + m_image = image; + m_alphaMode = alphaMode; +} + +void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock rgba; + BlockDXT1 block; + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + rgba.init(m_image, x, y); + + QuickCompress::compressDXT1(rgba, &block); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + + +void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock rgba; + BlockDXT1 block; + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + rgba.init(m_image, x, y); + + QuickCompress::compressDXT1a(rgba, &block); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + + +void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock rgba; + BlockDXT3 block; + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + rgba.init(m_image, x, y); + + QuickCompress::compressDXT3(rgba, &block); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + + +void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock rgba; + BlockDXT5 block; + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + rgba.init(m_image, x, y); + + QuickCompress::compressDXT5(rgba, &block, 0); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + + +void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock rgba; + BlockDXT5 block; + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + rgba.init(m_image, x, y); + + rgba.swizzleDXT5n(); + + QuickCompress::compressDXT5(rgba, &block, 0); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + + +nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None) +{ +} + +nv::SlowCompressor::~SlowCompressor() +{ +} + +void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode) +{ + m_image = image; + m_alphaMode = alphaMode; +} + +void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock rgba; + BlockDXT1 block; + + squish::WeightedClusterFit fit; + //squish::ClusterFit fit; + //squish::FastClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + + rgba.init(m_image, x, y); + + if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), &block); + } + else + { + squish::ColourSet colours((uint8 *)rgba.colors(), 0, true); + fit.SetColourSet(&colours, squish::kDxt1); + fit.Compress(&block); + } + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + + +void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock rgba; + BlockDXT1 block; + + squish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + + rgba.init(m_image, x, y); + + bool anyAlpha = false; + bool allAlpha = true; + + for (uint i = 0; i < 16; i++) + { + if (rgba.color(i).a < 128) anyAlpha = true; + else allAlpha = false; + } + + if ((!anyAlpha && rgba.isSingleColor() || allAlpha)) + { + OptimalCompress::compressDXT1a(rgba.color(0), &block); + } + else + { + squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha); + fit.SetColourSet(&colours, squish::kDxt1); + fit.Compress(&block); + } + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + + +void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock rgba; + BlockDXT3 block; + + squish::WeightedClusterFit fit; + //squish::FastClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + + rgba.init(m_image, x, y); + + // Compress explicit alpha. + OptimalCompress::compressDXT3A(rgba, &block.alpha); + + // Compress color. + if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), &block.color); + } + else + { + squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha); + fit.SetColourSet(&colours, 0); + fit.Compress(&block.color); + } + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + +void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock rgba; + BlockDXT5 block; + + squish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + + rgba.init(m_image, x, y); + + // Compress alpha. + if (compressionOptions.quality == Quality_Highest) + { + OptimalCompress::compressDXT5A(rgba, &block.alpha); + } + else + { + QuickCompress::compressDXT5A(rgba, &block.alpha); + } + + // Compress color. + if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), &block.color); + } + else + { + squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha); + fit.SetColourSet(&colours, 0); + fit.Compress(&block.color); + } + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + + +void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock rgba; + BlockDXT5 block; + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + + rgba.init(m_image, x, y); + + rgba.swizzleDXT5n(); + + // Compress X. + if (compressionOptions.quality == Quality_Highest) + { + OptimalCompress::compressDXT5A(rgba, &block.alpha); + } + else + { + QuickCompress::compressDXT5A(rgba, &block.alpha); + } + + // Compress Y. + OptimalCompress::compressDXT1G(rgba, &block.color); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + + +void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock rgba; + AlphaBlockDXT5 block; + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + + rgba.init(m_image, x, y); + + if (compressionOptions.quality == Quality_Highest) + { + OptimalCompress::compressDXT5A(rgba, &block); + } + else + { + QuickCompress::compressDXT5A(rgba, &block); + } + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + + +void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + const uint w = m_image->width(); + const uint h = m_image->height(); + + ColorBlock xcolor; + ColorBlock ycolor; + + BlockATI2 block; + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + + xcolor.init(m_image, x, y); + xcolor.splatX(); + + ycolor.init(m_image, x, y); + ycolor.splatY(); + + if (compressionOptions.quality == Quality_Highest) + { + OptimalCompress::compressDXT5A(xcolor, &block.x); + OptimalCompress::compressDXT5A(ycolor, &block.y); + } + else + { + QuickCompress::compressDXT5A(xcolor, &block.x); + QuickCompress::compressDXT5A(ycolor, &block.y); + } + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&block, sizeof(block)); + } + } + } +} + + +#if defined(HAVE_S3QUANT) + +void nv::s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions) +{ + const uint w = image->width(); + const uint h = image->height(); + + float error = 0.0f; + + BlockDXT1 dxtBlock3; + BlockDXT1 dxtBlock4; + ColorBlock block; + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4) { + block.init(image, x, y); + + // Init rgb block. + RGBBlock rgbBlock; + rgbBlock.n = 16; + for (uint i = 0; i < 16; i++) { + rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f); + rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f); + rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f); + } + rgbBlock.weight[0] = 1.0f; + rgbBlock.weight[1] = 1.0f; + rgbBlock.weight[2] = 1.0f; + + rgbBlock.inLevel = 4; + CodeRGBBlock(&rgbBlock); + + // Copy results to DXT block. + dxtBlock4.col0.r = rgbBlock.endPoint[0][0]; + dxtBlock4.col0.g = rgbBlock.endPoint[0][1]; + dxtBlock4.col0.b = rgbBlock.endPoint[0][2]; + + dxtBlock4.col1.r = rgbBlock.endPoint[1][0]; + dxtBlock4.col1.g = rgbBlock.endPoint[1][1]; + dxtBlock4.col1.b = rgbBlock.endPoint[1][2]; + + dxtBlock4.setIndices(rgbBlock.index); + + if (dxtBlock4.col0.u < dxtBlock4.col1.u) { + swap(dxtBlock4.col0.u, dxtBlock4.col1.u); + dxtBlock4.indices ^= 0x55555555; + } + + uint error4 = blockError(block, dxtBlock4); + + rgbBlock.inLevel = 3; + + CodeRGBBlock(&rgbBlock); + + // Copy results to DXT block. + dxtBlock3.col0.r = rgbBlock.endPoint[0][0]; + dxtBlock3.col0.g = rgbBlock.endPoint[0][1]; + dxtBlock3.col0.b = rgbBlock.endPoint[0][2]; + + dxtBlock3.col1.r = rgbBlock.endPoint[1][0]; + dxtBlock3.col1.g = rgbBlock.endPoint[1][1]; + dxtBlock3.col1.b = rgbBlock.endPoint[1][2]; + + dxtBlock3.setIndices(rgbBlock.index); + + if (dxtBlock3.col0.u > dxtBlock3.col1.u) { + swap(dxtBlock3.col0.u, dxtBlock3.col1.u); + dxtBlock3.indices ^= (~dxtBlock3.indices >> 1) & 0x55555555; + } + + uint error3 = blockError(block, dxtBlock3); + + if (error3 < error4) { + error += error3; + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3)); + } + } + else { + error += error4; + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4)); + } + } + } + } + + printf("error = %f\n", error/((w+3)/4 * (h+3)/4)); +} + +#endif // defined(HAVE_S3QUANT) + + +#if defined(HAVE_ATITC) + +void nv::atiCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions) +{ + // Init source texture + ATI_TC_Texture srcTexture; + srcTexture.dwSize = sizeof(srcTexture); + srcTexture.dwWidth = image->width(); + srcTexture.dwHeight = image->height(); + srcTexture.dwPitch = image->width() * 4; + srcTexture.format = ATI_TC_FORMAT_ARGB_8888; + srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); + srcTexture.pData = (ATI_TC_BYTE*) image->pixels(); + + // Init dest texture + ATI_TC_Texture destTexture; + destTexture.dwSize = sizeof(destTexture); + destTexture.dwWidth = image->width(); + destTexture.dwHeight = image->height(); + destTexture.dwPitch = 0; + destTexture.format = ATI_TC_FORMAT_DXT1; + destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); + destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); + + // Compress + ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); + } +} + +#endif // defined(HAVE_ATITC) diff --git a/src/nvtt/CompressDXT.h b/src/nvtt/CompressDXT.h index 6c946e3..841999d 100644 --- a/src/nvtt/CompressDXT.h +++ b/src/nvtt/CompressDXT.h @@ -30,153 +30,55 @@ namespace nv { class Image; - struct ColorBlock; + class FloatImage; - struct CompressorInterface + class FastCompressor { - virtual ~CompressorInterface() {} - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) = 0; - }; - - struct FixedBlockCompressor : public CompressorInterface - { - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; - virtual uint blockSize() const = 0; - }; - - - // Fast CPU compressors. - struct FastCompressorDXT1 : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; } - }; - - struct FastCompressorDXT1a : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; } - }; - - struct FastCompressorDXT3 : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; } - }; - - struct FastCompressorDXT5 : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; } - }; - - struct FastCompressorDXT5n : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; } - }; - - struct FastCompressorBC4 : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; } - }; - - struct FastCompressorBC5 : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; } - }; - - - // Normal CPU compressors. - struct NormalCompressorDXT1 : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; } - }; + public: + FastCompressor(); + ~FastCompressor(); - struct NormalCompressorDXT1a : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; } - }; + void setImage(const Image * image, nvtt::AlphaMode alphaMode); - struct NormalCompressorDXT3 : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; } - }; + void compressDXT1(const nvtt::OutputOptions::Private & outputOptions); + void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions); + void compressDXT3(const nvtt::OutputOptions::Private & outputOptions); + void compressDXT5(const nvtt::OutputOptions::Private & outputOptions); + void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions); - struct NormalCompressorDXT5 : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; } + private: + const Image * m_image; + nvtt::AlphaMode m_alphaMode; }; - struct NormalCompressorDXT5n : public FixedBlockCompressor + class SlowCompressor { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; } - }; + public: + SlowCompressor(); + ~SlowCompressor(); + void setImage(const Image * image, nvtt::AlphaMode alphaMode); - // Production CPU compressors. - struct ProductionCompressorBC4 : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; } - }; + void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - struct ProductionCompressorBC5 : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; } + private: + const Image * m_image; + nvtt::AlphaMode m_alphaMode; }; - // External compressors. #if defined(HAVE_S3QUANT) - struct S3CompressorDXT1 : public CompressorInterface - { - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - }; + void s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions); #endif #if defined(HAVE_ATITC) - struct AtiCompressorDXT1 : public CompressorInterface - { - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - }; - - struct AtiCompressorDXT5 : public CompressorInterface - { - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - }; -#endif - -#if defined(HAVE_SQUISH) - struct SquishCompressorDXT1 : public CompressorInterface - { - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - }; -#endif - -#if defined(HAVE_D3DX) - struct D3DXCompressorDXT1 : public CompressorInterface - { - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - }; -#endif - -#if defined(HAVE_STB) - struct StbCompressorDXT1 : public FixedBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; } - }; + void atiCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions); #endif } // nv namespace diff --git a/src/nvtt/CompressRGB.cpp b/src/nvtt/CompressRGB.cpp index 50fdb10..35239c4 100644 --- a/src/nvtt/CompressRGB.cpp +++ b/src/nvtt/CompressRGB.cpp @@ -21,18 +21,15 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include "CompressRGB.h" -#include "CompressionOptions.h" -#include "OutputOptions.h" +#include #include -#include #include - #include -#include -#include +#include "CompressRGB.h" +#include "CompressionOptions.h" +#include "OutputOptions.h" using namespace nv; using namespace nvtt; @@ -69,53 +66,29 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO const uint w = image->width(); const uint h = image->height(); - uint bitCount; - uint rmask, rshift, rsize; - uint gmask, gshift, gsize; - uint bmask, bshift, bsize; - uint amask, ashift, asize; - - if (compressionOptions.bitcount != 0) - { - bitCount = compressionOptions.bitcount; - nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32); - - rmask = compressionOptions.rmask; - gmask = compressionOptions.gmask; - bmask = compressionOptions.bmask; - amask = compressionOptions.amask; - - PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize); - PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize); - PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize); - PixelFormat::maskShiftAndSize(amask, &ashift, &asize); - } - else - { - rsize = compressionOptions.rsize; - gsize = compressionOptions.gsize; - bsize = compressionOptions.bsize; - asize = compressionOptions.asize; - - bitCount = rsize + gsize + bsize + asize; - nvCheck(bitCount <= 32); - - ashift = 0; - bshift = ashift + asize; - gshift = bshift + bsize; - rshift = gshift + gsize; - - rmask = ((1 << rsize) - 1) << rshift; - gmask = ((1 << gsize) - 1) << gshift; - bmask = ((1 << bsize) - 1) << bshift; - amask = ((1 << asize) - 1) << ashift; - } + const uint bitCount = compressionOptions.bitcount; + nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32); const uint byteCount = bitCount / 8; + const uint rmask = compressionOptions.rmask; + uint rshift, rsize; + PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize); + + const uint gmask = compressionOptions.gmask; + uint gshift, gsize; + PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize); + + const uint bmask = compressionOptions.bmask; + uint bshift, bsize; + PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize); + + const uint amask = compressionOptions.amask; + uint ashift, asize; + PixelFormat::maskShiftAndSize(amask, &ashift, &asize); // Determine pitch. - uint pitch = computePitch(w, bitCount); + uint pitch = computePitch(w, compressionOptions.bitcount); uint8 * dst = (uint8 *)mem::malloc(pitch + 4); @@ -165,75 +138,3 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO mem::free(dst); } - -void nv::compressRGB(const FloatImage * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions) -{ - nvCheck(image != NULL); - - const uint w = image->width(); - const uint h = image->height(); - - const uint rsize = compressionOptions.rsize; - const uint gsize = compressionOptions.gsize; - const uint bsize = compressionOptions.bsize; - const uint asize = compressionOptions.asize; - - nvCheck(rsize == 0 || rsize == 16 || rsize == 32); - nvCheck(gsize == 0 || gsize == 16 || gsize == 32); - nvCheck(bsize == 0 || bsize == 16 || bsize == 32); - nvCheck(asize == 0 || asize == 16 || asize == 32); - - const uint bitCount = rsize + gsize + bsize + asize; - const uint byteCount = bitCount / 8; - const uint pitch = w * byteCount; - - uint8 * dst = (uint8 *)mem::malloc(pitch); - - for (uint y = 0; y < h; y++) - { - const float * rchannel = image->scanline(y, 0); - const float * gchannel = image->scanline(y, 1); - const float * bchannel = image->scanline(y, 2); - const float * achannel = image->scanline(y, 3); - - union FLOAT - { - float f; - uint32 u; - }; - - uint8 * ptr = dst; - - for (uint x = 0; x < w; x++) - { - FLOAT r, g, b, a; - r.f = rchannel[x]; - g.f = gchannel[x]; - b.f = bchannel[x]; - a.f = achannel[x]; - - if (rsize == 32) *((uint32 *)ptr) = r.u; - else if (rsize == 16) *((uint16 *)ptr) = half_from_float(r.u); - ptr += rsize / 8; - - if (gsize == 32) *((uint32 *)ptr) = g.u; - else if (gsize == 16) *((uint16 *)ptr) = half_from_float(g.u); - ptr += gsize / 8; - - if (bsize == 32) *((uint32 *)ptr) = b.u; - else if (bsize == 16) *((uint16 *)ptr) = half_from_float(b.u); - ptr += bsize / 8; - - if (asize == 32) *((uint32 *)ptr) = a.u; - else if (asize == 16) *((uint16 *)ptr) = half_from_float(a.u); - ptr += asize / 8; - } - - if (outputOptions.outputHandler != NULL) - { - outputOptions.outputHandler->writeData(dst, pitch); - } - } - - mem::free(dst); -} diff --git a/src/nvtt/CompressRGB.h b/src/nvtt/CompressRGB.h index 7d8ae6b..3bab47d 100644 --- a/src/nvtt/CompressRGB.h +++ b/src/nvtt/CompressRGB.h @@ -29,11 +29,9 @@ namespace nv { class Image; - class FloatImage; // Pixel format converter. void compressRGB(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions); - void compressRGB(const FloatImage * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions); } // nv namespace diff --git a/src/nvtt/CompressionOptions.cpp b/src/nvtt/CompressionOptions.cpp index c86b162..944256e 100644 --- a/src/nvtt/CompressionOptions.cpp +++ b/src/nvtt/CompressionOptions.cpp @@ -55,12 +55,6 @@ void CompressionOptions::reset() m.rmask = 0x00FF0000; m.amask = 0xFF000000; - m.rsize = 8; - m.gsize = 8; - m.bsize = 8; - m.asize = 8; - m.pixelType = PixelType_UnsignedNorm; - m.enableColorDithering = false; m.enableAlphaDithering = false; m.binaryAlpha = false; @@ -123,36 +117,8 @@ void CompressionOptions::setPixelFormat(uint bitcount, uint rmask, uint gmask, u m.gmask = gmask; m.bmask = bmask; m.amask = amask; - - m.rsize = 0; - m.gsize = 0; - m.bsize = 0; - m.asize = 0; -} - -void CompressionOptions::setPixelFormat(uint8 rsize, uint8 gsize, uint8 bsize, uint8 asize) -{ - nvCheck(rsize <= 32 || gsize <= 32 || bsize <= 32 || asize <= 32); - - m.bitcount = 0; - m.rmask = 0; - m.gmask = 0; - m.bmask = 0; - m.amask = 0; - - m.rsize = rsize; - m.gsize = gsize; - m.bsize = bsize; - m.asize = asize; } -/// Set pixel type. -void CompressionOptions::setPixelType(PixelType pixelType) -{ - m.pixelType = pixelType; -} - - /// Use external compressor. void CompressionOptions::setExternalCompressor(const char * name) { diff --git a/src/nvtt/CompressionOptions.h b/src/nvtt/CompressionOptions.h index ba69bfb..0a30673 100644 --- a/src/nvtt/CompressionOptions.h +++ b/src/nvtt/CompressionOptions.h @@ -45,12 +45,6 @@ namespace nvtt uint gmask; uint bmask; uint amask; - uint8 rsize; - uint8 gsize; - uint8 bsize; - uint8 asize; - - PixelType pixelType; nv::String externalCompressor; @@ -59,15 +53,6 @@ namespace nvtt bool enableAlphaDithering; bool binaryAlpha; int alphaThreshold; // reference value used for binary alpha quantization. - - uint getBitCount() const - { - if (format == Format_RGBA) { - if (bitcount != 0) return bitcount; - else return rsize + gsize + bsize + asize; - } - return 0; - } }; } // nvtt namespace diff --git a/src/nvtt/Compressor.cpp b/src/nvtt/Compressor.cpp new file mode 100644 index 0000000..6631b91 --- /dev/null +++ b/src/nvtt/Compressor.cpp @@ -0,0 +1,854 @@ +// Copyright NVIDIA Corporation 2008 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Compressor.h" +#include "InputOptions.h" +#include "CompressionOptions.h" +#include "OutputOptions.h" + +#include "CompressDXT.h" +#include "CompressRGB.h" +#include "cuda/CudaUtils.h" +#include "cuda/CudaCompressDXT.h" + + +using namespace nv; +using namespace nvtt; + + +namespace +{ + + static int blockSize(Format format) + { + if (format == Format_DXT1 || format == Format_DXT1a) { + return 8; + } + else if (format == Format_DXT3) { + return 16; + } + else if (format == Format_DXT5 || format == Format_DXT5n) { + return 16; + } + else if (format == Format_BC4) { + return 8; + } + else if (format == Format_BC5) { + return 16; + } + return 0; + } + + inline uint computePitch(uint w, uint bitsize) + { + uint p = w * ((bitsize + 7) / 8); + + // Align to 32 bits. + return ((p + 3) / 4) * 4; + } + + static int computeImageSize(uint w, uint h, uint d, uint bitCount, Format format) + { + if (format == Format_RGBA) { + return d * h * computePitch(w, bitCount); + } + else { + // @@ Handle 3D textures. DXT and VTC have different behaviors. + return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format); + } + } + +} // namespace + +namespace nvtt +{ + // Mipmap could be: + // - a pointer to an input image. + // - a fixed point image. + // - a floating point image. + struct Mipmap + { + Mipmap() : m_inputImage(NULL) {} + ~Mipmap() {} + + // Reference input image. + void setFromInput(const InputOptions::Private & inputOptions, uint idx) + { + m_inputImage = inputOptions.image(idx); + m_fixedImage = NULL; + m_floatImage = NULL; + } + + // Assign and take ownership of given image. + void setImage(FloatImage * image) + { + m_inputImage = NULL; + m_fixedImage = NULL; + m_floatImage = image; + } + + + // Convert linear float image to fixed image ready for compression. + void toFixedImage(const InputOptions::Private & inputOptions) + { + if (this->asFixedImage() == NULL) + { + nvDebugCheck(m_floatImage != NULL); + + if (inputOptions.isNormalMap || inputOptions.outputGamma == 1.0f) + { + m_fixedImage = m_floatImage->createImage(); + } + else + { + m_fixedImage = m_floatImage->createImageGammaCorrect(inputOptions.outputGamma); + } + } + } + + // Convert input image to linear float image. + void toFloatImage(const InputOptions::Private & inputOptions) + { + if (m_floatImage == NULL) + { + nvDebugCheck(this->asFixedImage() != NULL); + + m_floatImage = new FloatImage(this->asFixedImage()); + + if (inputOptions.isNormalMap) + { + // Expand normals to [-1, 1] range. + // floatImage->expandNormals(0); + } + else if (inputOptions.inputGamma != 1.0f) + { + // Convert to linear space. + m_floatImage->toLinear(0, 3, inputOptions.inputGamma); + } + } + } + + const FloatImage * asFloatImage() const + { + return m_floatImage.ptr(); + } + + FloatImage * asFloatImage() + { + return m_floatImage.ptr(); + } + + const Image * asFixedImage() const + { + if (m_inputImage != NULL) + { + return m_inputImage; + } + return m_fixedImage.ptr(); + } + + Image * asMutableFixedImage() + { + if (m_inputImage != NULL) + { + // Do not modify input image, create a copy. + m_fixedImage = new Image(*m_inputImage); + m_inputImage = NULL; + } + return m_fixedImage.ptr(); + } + + + private: + const Image * m_inputImage; + AutoPtr m_fixedImage; + AutoPtr m_floatImage; + }; + +} // nvtt namespace + + +Compressor::Compressor() : m(*new Compressor::Private()) +{ + // CUDA initialization. + m.cudaSupported = cuda::isHardwarePresent(); + m.cudaEnabled = false; + m.cudaDevice = -1; + + enableCudaAcceleration(m.cudaSupported); +} + +Compressor::~Compressor() +{ + enableCudaAcceleration(false); + delete &m; +} + + +/// Enable CUDA acceleration. +void Compressor::enableCudaAcceleration(bool enable) +{ + if (m.cudaSupported) + { + if (m.cudaEnabled && !enable) + { + m.cudaEnabled = false; + m.cuda = NULL; + + if (m.cudaDevice != -1) + { + // Exit device. + cuda::exitDevice(); + } + } + else if (!m.cudaEnabled && enable) + { + // Init the CUDA device. This may return -1 if CUDA was already initialized by the app. + m.cudaEnabled = cuda::initDevice(&m.cudaDevice); + + if (m.cudaEnabled) + { + // Create compressor if initialization succeeds. + m.cuda = new CudaCompressor(); + + // But cleanup if failed. + if (!m.cuda->isValid()) + { + enableCudaAcceleration(false); + } + } + } + } +} + +/// Check if CUDA acceleration is enabled. +bool Compressor::isCudaAccelerationEnabled() const +{ + return m.cudaEnabled; +} + + +/// Compress the input texture with the given compression options. +bool Compressor::process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const +{ + return m.compress(inputOptions.m, compressionOptions.m, outputOptions.m); +} + + +/// Estimate the size of compressing the input with the given options. +int Compressor::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const +{ + return m.estimateSize(inputOptions.m, compressionOptions.m); +} + + + + +bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const +{ + // Make sure enums match. + nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp); + nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror); + nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat); + + // Get output handler. + if (!outputOptions.openFile()) + { + if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen); + return false; + } + + inputOptions.computeTargetExtents(); + + // Output DDS header. + if (!outputHeader(inputOptions, compressionOptions, outputOptions)) + { + return false; + } + + for (uint f = 0; f < inputOptions.faceCount; f++) + { + if (!compressMipmaps(f, inputOptions, compressionOptions, outputOptions)) + { + return false; + } + } + + outputOptions.closeFile(); + + return true; +} + + +// Output DDS header. +bool Compressor::Private::outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const +{ + // Output DDS header. + if (outputOptions.outputHandler == NULL || !outputOptions.outputHeader) + { + return true; + } + + DDSHeader header; + + header.setWidth(inputOptions.targetWidth); + header.setHeight(inputOptions.targetHeight); + + int mipmapCount = inputOptions.realMipmapCount(); + nvDebugCheck(mipmapCount > 0); + + header.setMipmapCount(mipmapCount); + + if (inputOptions.textureType == TextureType_2D) { + header.setTexture2D(); + } + else if (inputOptions.textureType == TextureType_Cube) { + header.setTextureCube(); + } + /*else if (inputOptions.textureType == TextureType_3D) { + header.setTexture3D(); + header.setDepth(inputOptions.targetDepth); + }*/ + + if (compressionOptions.format == Format_RGBA) + { + header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount)); + header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask); + } + else + { + header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format)); + + if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) { + header.setFourCC('D', 'X', 'T', '1'); + if (inputOptions.isNormalMap) header.setNormalFlag(true); + } + else if (compressionOptions.format == Format_DXT3) { + header.setFourCC('D', 'X', 'T', '3'); + } + else if (compressionOptions.format == Format_DXT5) { + header.setFourCC('D', 'X', 'T', '5'); + } + else if (compressionOptions.format == Format_DXT5n) { + header.setFourCC('D', 'X', 'T', '5'); + if (inputOptions.isNormalMap) header.setNormalFlag(true); + } + else if (compressionOptions.format == Format_BC4) { + header.setFourCC('A', 'T', 'I', '1'); + } + else if (compressionOptions.format == Format_BC5) { + header.setFourCC('A', 'T', 'I', '2'); + if (inputOptions.isNormalMap) header.setNormalFlag(true); + } + } + + // Swap bytes if necessary. + header.swapBytes(); + + uint headerSize = 128; + if (header.hasDX10Header()) + { + nvStaticCheck(sizeof(DDSHeader) == 128 + 20); + headerSize = 128 + 20; + } + + bool writeSucceed = outputOptions.outputHandler->writeData(&header, headerSize); + if (!writeSucceed && outputOptions.errorHandler != NULL) + { + outputOptions.errorHandler->error(Error_FileWrite); + } + + return writeSucceed; +} + + +bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const +{ + uint w = inputOptions.targetWidth; + uint h = inputOptions.targetHeight; + uint d = inputOptions.targetDepth; + + Mipmap mipmap; + + const uint mipmapCount = inputOptions.realMipmapCount(); + nvDebugCheck(mipmapCount > 0); + + for (uint m = 0; m < mipmapCount; m++) + { + if (outputOptions.outputHandler) + { + int size = computeImageSize(w, h, d, compressionOptions.bitcount, compressionOptions.format); + outputOptions.outputHandler->beginImage(size, w, h, d, f, m); + } + + // @@ Where to do the color transform? + // - Color transform may not be linear, so we cannot do before computing mipmaps. + // - Should be done in linear space, that is, after gamma correction. + + if (!initMipmap(mipmap, inputOptions, w, h, d, f, m)) + { + if (outputOptions.errorHandler != NULL) + { + outputOptions.errorHandler->error(Error_InvalidInput); + return false; + } + } + + quantizeMipmap(mipmap, compressionOptions); + + compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions); + + // Compute extents of next mipmap: + w = max(1U, w / 2); + h = max(1U, h / 2); + d = max(1U, d / 2); + } + + return true; +} + +bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const +{ + // Find image from input. + int inputIdx = findExactMipmap(inputOptions, w, h, d, f); + + if ((inputIdx == -1 || inputOptions.convertToNormalMap) && m != 0) + { + // Generate from last, when mipmap not found, or normal map conversion enabled. + downsampleMipmap(mipmap, inputOptions); + } + else + { + if (inputIdx != -1) + { + // If input mipmap found, then get from input. + mipmap.setFromInput(inputOptions, inputIdx); + } + else + { + // If not found, resize closest mipmap. + inputIdx = findClosestMipmap(inputOptions, w, h, d, f); + + if (inputIdx == -1) + { + return false; + } + + mipmap.setFromInput(inputOptions, inputIdx); + + scaleMipmap(mipmap, inputOptions, w, h, d); + } + + processInputImage(mipmap, inputOptions); + } + + // Convert linear float image to fixed image ready for compression. + mipmap.toFixedImage(inputOptions); + + return true; +} + +int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const +{ + for (int m = 0; m < int(inputOptions.mipmapCount); m++) + { + int idx = f * inputOptions.mipmapCount + m; + const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx]; + + if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d)) + { + if (inputImage.data != NULL) + { + return idx; + } + return -1; + } + else if (inputImage.width < int(w) || inputImage.height < int(h) || inputImage.depth < int(d)) + { + return -1; + } + } + + return -1; +} + +int Compressor::Private::findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const +{ + int bestIdx = -1; + + for (int m = 0; m < int(inputOptions.mipmapCount); m++) + { + int idx = f * inputOptions.mipmapCount + m; + const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx]; + + if (inputImage.data != NULL) + { + int difference = (inputImage.width - w) + (inputImage.height - h) + (inputImage.depth - d); + + if (difference < 0) + { + if (bestIdx == -1) + { + bestIdx = idx; + } + + return bestIdx; + } + + bestIdx = idx; + } + } + + return bestIdx; +} + +// Create mipmap from the given image. +void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const +{ + // Make sure that floating point linear representation is available. + mipmap.toFloatImage(inputOptions); + + const FloatImage * floatImage = mipmap.asFloatImage(); + + if (inputOptions.mipmapFilter == MipmapFilter_Box) + { + // Use fast downsample. + mipmap.setImage(floatImage->fastDownSample()); + } + else if (inputOptions.mipmapFilter == MipmapFilter_Triangle) + { + TriangleFilter filter; + mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode)); + } + else /*if (inputOptions.mipmapFilter == MipmapFilter_Kaiser)*/ + { + nvDebugCheck(inputOptions.mipmapFilter == MipmapFilter_Kaiser); + KaiserFilter filter(inputOptions.kaiserWidth); + filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch); + mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode)); + } + + // Normalize mipmap. + if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps) + { + normalizeNormalMap(mipmap.asFloatImage()); + } +} + + +void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const +{ + mipmap.toFloatImage(inputOptions); + + // @@ Add more filters. + // @@ Select different filters for downscaling and reconstruction. + + // Resize image. + BoxFilter boxFilter; + mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode)); +} + + +// Process an input image: Convert to normal map, normalize, or convert to linear space. +void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const +{ + if (inputOptions.convertToNormalMap) + { + mipmap.toFixedImage(inputOptions); + + Vector4 heightScale = inputOptions.heightFactors; + mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale)); + } + else if (inputOptions.isNormalMap) + { + if (inputOptions.normalizeMipmaps) + { + // If floating point image available, normalize in place. + if (mipmap.asFloatImage() == NULL) + { + FloatImage * floatImage = new FloatImage(mipmap.asFixedImage()); + normalizeNormalMap(floatImage); + mipmap.setImage(floatImage); + } + else + { + normalizeNormalMap(mipmap.asFloatImage()); + mipmap.setImage(mipmap.asFloatImage()); + } + } + } + else + { + if (inputOptions.inputGamma != inputOptions.outputGamma) + { + mipmap.toFloatImage(inputOptions); + } + } +} + + +// Quantize the given mipmap according to the compression options. +void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const +{ + nvDebugCheck(mipmap.asFixedImage() != NULL); + + if (compressionOptions.binaryAlpha) + { + if (compressionOptions.enableAlphaDithering) + { + Quantize::FloydSteinberg_BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold); + } + else + { + Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold); + } + } + + if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering) + { + uint rsize = 8; + uint gsize = 8; + uint bsize = 8; + uint asize = 8; + + if (compressionOptions.enableColorDithering) + { + if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5) + { + rsize = 5; + gsize = 6; + bsize = 5; + } + else if (compressionOptions.format == Format_RGB) + { + uint rshift, gshift, bshift; + PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize); + PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize); + PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize); + } + } + + if (compressionOptions.enableAlphaDithering) + { + if (compressionOptions.format == Format_DXT3) + { + asize = 4; + } + else if (compressionOptions.format == Format_RGB) + { + uint ashift; + PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize); + } + } + + if (compressionOptions.binaryAlpha) + { + asize = 8; // Already quantized. + } + + Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize); + } +} + + +// Compress the given mipmap. +bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const +{ + const Image * image = mipmap.asFixedImage(); + nvDebugCheck(image != NULL); + + FastCompressor fast; + fast.setImage(image, inputOptions.alphaMode); + + SlowCompressor slow; + slow.setImage(image, inputOptions.alphaMode); + + const bool useCuda = cudaEnabled && image->width() * image->height() >= 512; + + if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB) + { + compressRGB(image, outputOptions, compressionOptions); + } + else if (compressionOptions.format == Format_DXT1) + { +#if defined(HAVE_S3QUANT) + if (compressionOptions.externalCompressor == "s3") + { + s3CompressDXT1(image, outputOptions); + } + else +#endif + +#if defined(HAVE_ATITC) + if (compressionOptions.externalCompressor == "ati") + { + atiCompressDXT1(image, outputOptions); + } + else +#endif + if (compressionOptions.quality == Quality_Fastest) + { + fast.compressDXT1(outputOptions); + } + else + { + if (useCuda) + { + nvDebugCheck(cudaSupported); + cuda->setImage(image, inputOptions.alphaMode); + cuda->compressDXT1(compressionOptions, outputOptions); + } + else + { + slow.compressDXT1(compressionOptions, outputOptions); + } + } + } + else if (compressionOptions.format == Format_DXT1a) + { + if (compressionOptions.quality == Quality_Fastest) + { + fast.compressDXT1a(outputOptions); + } + else + { + if (useCuda) + { + nvDebugCheck(cudaSupported); + /*cuda*/slow.compressDXT1a(compressionOptions, outputOptions); + } + else + { + slow.compressDXT1a(compressionOptions, outputOptions); + } + } + } + else if (compressionOptions.format == Format_DXT3) + { + if (compressionOptions.quality == Quality_Fastest) + { + fast.compressDXT3(outputOptions); + } + else + { + if (useCuda) + { + nvDebugCheck(cudaSupported); + cuda->setImage(image, inputOptions.alphaMode); + cuda->compressDXT3(compressionOptions, outputOptions); + } + else + { + slow.compressDXT3(compressionOptions, outputOptions); + } + } + } + else if (compressionOptions.format == Format_DXT5) + { + if (compressionOptions.quality == Quality_Fastest) + { + fast.compressDXT5(outputOptions); + } + else + { + if (useCuda) + { + nvDebugCheck(cudaSupported); + cuda->setImage(image, inputOptions.alphaMode); + cuda->compressDXT5(compressionOptions, outputOptions); + } + else + { + slow.compressDXT5(compressionOptions, outputOptions); + } + } + } + else if (compressionOptions.format == Format_DXT5n) + { + if (compressionOptions.quality == Quality_Fastest) + { + fast.compressDXT5n(outputOptions); + } + else + { + slow.compressDXT5n(compressionOptions, outputOptions); + } + } + else if (compressionOptions.format == Format_BC4) + { + slow.compressBC4(compressionOptions, outputOptions); + } + else if (compressionOptions.format == Format_BC5) + { + slow.compressBC5(compressionOptions, outputOptions); + } + + return true; +} + + +int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const +{ + const Format format = compressionOptions.format; + const uint bitCount = compressionOptions.bitcount; + + inputOptions.computeTargetExtents(); + + uint mipmapCount = inputOptions.realMipmapCount(); + + int size = 0; + + for (uint f = 0; f < inputOptions.faceCount; f++) + { + uint w = inputOptions.targetWidth; + uint h = inputOptions.targetHeight; + uint d = inputOptions.targetDepth; + + for (uint m = 0; m < mipmapCount; m++) + { + size += computeImageSize(w, h, d, bitCount, format); + + // Compute extents of next mipmap: + w = max(1U, w / 2); + h = max(1U, h / 2); + d = max(1U, d / 2); + } + } + + return size; +} diff --git a/src/nvtt/Context.h b/src/nvtt/Compressor.h similarity index 79% rename from src/nvtt/Context.h rename to src/nvtt/Compressor.h index 1c81db7..8737e29 100644 --- a/src/nvtt/Context.h +++ b/src/nvtt/Compressor.h @@ -27,7 +27,6 @@ #include #include -#include #include "nvtt.h" @@ -45,20 +44,11 @@ namespace nvtt Private() {} bool compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; - - bool compress(const void * data, int width, int height, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; - int estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const; - bool outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions); - private: bool outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; - - nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const; - nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const; - bool compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; bool initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const; @@ -68,18 +58,19 @@ namespace nvtt void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const; void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const; - void premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const; void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const; void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const; bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; + public: bool cudaSupported; bool cudaEnabled; + int cudaDevice; - nv::AutoPtr cuda; + nv::AutoPtr cuda; }; diff --git a/src/nvtt/Context.cpp b/src/nvtt/Context.cpp deleted file mode 100644 index 020c5f8..0000000 --- a/src/nvtt/Context.cpp +++ /dev/null @@ -1,1545 +0,0 @@ -// Copyright NVIDIA Corporation 2008 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "Context.h" - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "InputOptions.h" -#include "CompressionOptions.h" -#include "OutputOptions.h" - -#include "CompressDXT.h" -#include "CompressRGB.h" -#include "cuda/CudaUtils.h" -#include "cuda/CudaCompressDXT.h" - - -using namespace nv; -using namespace nvtt; - - -namespace -{ - - static int blockSize(Format format) - { - if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) { - return 8; - } - else if (format == Format_DXT3) { - return 16; - } - else if (format == Format_DXT5 || format == Format_DXT5n) { - return 16; - } - else if (format == Format_BC4) { - return 8; - } - else if (format == Format_BC5) { - return 16; - } - else if (format == Format_CTX1) { - return 8; - } - return 0; - } - - inline uint computePitch(uint w, uint bitsize) - { - uint p = w * ((bitsize + 7) / 8); - - // Align to 32 bits. - return ((p + 3) / 4) * 4; - } - - static int computeImageSize(uint w, uint h, uint d, uint bitCount, Format format) - { - if (format == Format_RGBA) { - return d * h * computePitch(w, bitCount); - } - else { - // @@ Handle 3D textures. DXT and VTC have different behaviors. - return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format); - } - } - -} // namespace - -namespace nvtt -{ - // Mipmap could be: - // - a pointer to an input image. - // - a fixed point image. - // - a floating point image. - struct Mipmap - { - Mipmap() : m_inputImage(NULL) {} - ~Mipmap() {} - - // Reference input image. - void setFromInput(const InputOptions::Private & inputOptions, uint idx) - { - m_inputImage = inputOptions.image(idx); - m_fixedImage = NULL; - m_floatImage = NULL; - - if (const FloatImage * floatImage = inputOptions.floatImage(idx)) - { - m_floatImage = floatImage->clone(); - } - } - - // Assign and take ownership of given image. - void setImage(FloatImage * image) - { - m_inputImage = NULL; - m_fixedImage = NULL; - m_floatImage = image; - } - - - // Convert linear float image to fixed image ready for compression. - void toFixedImage(const InputOptions::Private & inputOptions) - { - if (this->asFixedImage() == NULL) - { - nvDebugCheck(m_floatImage != NULL); - - if (inputOptions.isNormalMap || inputOptions.outputGamma == 1.0f) - { - m_fixedImage = m_floatImage->createImage(); - } - else - { - m_fixedImage = m_floatImage->createImageGammaCorrect(inputOptions.outputGamma); - } - } - } - - // Convert input image to linear float image. - void toFloatImage(const InputOptions::Private & inputOptions) - { - if (m_floatImage == NULL) - { - nvDebugCheck(this->asFixedImage() != NULL); - - m_floatImage = new FloatImage(this->asFixedImage()); - - if (inputOptions.isNormalMap) - { - // Expand normals to [-1, 1] range. - // floatImage->expandNormals(0); - } - else if (inputOptions.inputGamma != 1.0f) - { - // Convert to linear space. - m_floatImage->toLinear(0, 3, inputOptions.inputGamma); - } - } - } - - const FloatImage * asFloatImage() const - { - return m_floatImage.ptr(); - } - - FloatImage * asMutableFloatImage() - { - m_inputImage = NULL; - return m_floatImage.ptr(); - } - - const Image * asFixedImage() const - { - if (m_inputImage != NULL) - { - return m_inputImage; - } - return m_fixedImage.ptr(); - } - - Image * asMutableFixedImage() - { - if (m_inputImage != NULL) - { - // Do not modify input image, create a copy. - m_fixedImage = new Image(*m_inputImage); - m_inputImage = NULL; - } - return m_fixedImage.ptr(); - } - - - private: - const Image * m_inputImage; - AutoPtr m_fixedImage; - AutoPtr m_floatImage; - }; - -} // nvtt namespace - - -Compressor::Compressor() : m(*new Compressor::Private()) -{ - // CUDA initialization. - m.cudaSupported = cuda::isHardwarePresent(); - m.cudaEnabled = m.cudaSupported; - - if (m.cudaEnabled) - { -#pragma message(NV_FILE_LINE "FIXME: This code is duplicated below.") - // Select fastest CUDA device. - int device = cuda::getFastestDevice(); - if (!cuda::setDevice(device)) - { - m.cudaEnabled = false; - m.cuda = NULL; - } - else - { - m.cuda = new CudaContext(); - - if (!m.cuda->isValid()) - { - m.cudaEnabled = false; - m.cuda = NULL; - } - } - } -} - -Compressor::~Compressor() -{ - delete &m; - cuda::exit(); -} - - -/// Enable CUDA acceleration. -void Compressor::enableCudaAcceleration(bool enable) -{ - if (m.cudaSupported) - { - m.cudaEnabled = enable; - } - - if (m.cudaEnabled && m.cuda == NULL) - { - // Select fastest CUDA device. - int device = cuda::getFastestDevice(); - if (!cuda::setDevice(device)) - { - m.cudaEnabled = false; - m.cuda = NULL; - } - else - { - m.cuda = new CudaContext(); - - if (!m.cuda->isValid()) - { - m.cudaEnabled = false; - m.cuda = NULL; - } - } - } -} - -/// Return true if CUDA acceleration is enabled, false otherwise. -bool Compressor::isCudaAccelerationEnabled() const -{ - return m.cudaEnabled; -} - - -/// Compress the input texture with the given compression options. -bool Compressor::process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const -{ - return m.compress(inputOptions.m, compressionOptions.m, outputOptions.m); -} - -/// Estimate the size of compressing the input with the given options. -int Compressor::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const -{ - return m.estimateSize(inputOptions.m, compressionOptions.m); -} - - -// RAW api. -bool Compressor::compress2D(InputFormat format, int w, int h, void * data, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const -{ -#pragma message(NV_FILE_LINE "TODO: Implement raw compress api") - return false; -} - -int Compressor::estimateSize(int w, int h, int d, const CompressionOptions & compressionOptions) const -{ - const CompressionOptions::Private & co = compressionOptions.m; - - const Format format = co.format; - - uint bitCount = co.getBitCount(); - - return computeImageSize(w, h, d, bitCount, format); -} - - - -/// Create a TexImage. -TexImage Compressor::createTexImage() const -{ - return *new TexImage(); -} - - -bool Compressor::outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const -{ - return m.outputHeader(tex, mipmapCount, compressionOptions.m, outputOptions.m); -} - -bool Compressor::compress(const TexImage & tex, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const -{ -#pragma message(NV_FILE_LINE "TODO: Implement TexImage compress api") - - // @@ Convert to fixed point and call compress2D for each face. - return false; -} - -/// Estimate the size of compressing the given texture. -int Compressor::estimateSize(const TexImage & tex, const CompressionOptions & compressionOptions) const -{ - const uint w = tex.width(); - const uint h = tex.height(); - const uint d = tex.depth(); - const uint faceCount = tex.faceCount(); - - return faceCount * estimateSize(w, h, d, compressionOptions); -} - - - - -bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const -{ - // Make sure enums match. - nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp); - nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror); - nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat); - - // Get output handler. - if (!outputOptions.hasValidOutputHandler()) - { - if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen); - return false; - } - - inputOptions.computeTargetExtents(); - - // Output DDS header. - if (!outputHeader(inputOptions, compressionOptions, outputOptions)) - { - return false; - } - - for (uint f = 0; f < inputOptions.faceCount; f++) - { - if (!compressMipmaps(f, inputOptions, compressionOptions, outputOptions)) - { - return false; - } - } - - return true; -} - - -// Output DDS header. -bool Compressor::Private::outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const -{ - // Output DDS header. - if (outputOptions.outputHandler == NULL || !outputOptions.outputHeader) - { - return true; - } - - if (outputOptions.container == Container_DDS || outputOptions.container == Container_DDS10) - { - DDSHeader header; - - header.setWidth(inputOptions.targetWidth); - header.setHeight(inputOptions.targetHeight); - - int mipmapCount = inputOptions.realMipmapCount(); - nvDebugCheck(mipmapCount > 0); - - header.setMipmapCount(mipmapCount); - - bool supported = true; - - if (outputOptions.container == Container_DDS10) - { - if (compressionOptions.format == Format_RGBA) - { - int bitcount = compressionOptions.bitcount; - if (bitcount == 0) { - bitcount = compressionOptions.rsize + compressionOptions.gsize + compressionOptions.bsize + compressionOptions.asize; - } - - if (bitcount == 16) - { - if (compressionOptions.rsize == 16) - { - header.setDX10Format(56); // R16_UNORM - } - else - { - // B5G6R5_UNORM - // B5G5R5A1_UNORM - supported = false; - } - } - else if (bitcount == 32) - { - // B8G8R8A8_UNORM - // B8G8R8X8_UNORM - // R8G8B8A8_UNORM - // R10G10B10A2_UNORM - supported = false; - } - else { - supported = false; - } - } - else - { - if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) { - header.setDX10Format(71); - if (compressionOptions.format == Format_DXT1a) header.setHasAlphaFlag(true); - if (inputOptions.isNormalMap) header.setNormalFlag(true); - } - else if (compressionOptions.format == Format_DXT3) { - header.setDX10Format(74); - } - else if (compressionOptions.format == Format_DXT5) { - header.setDX10Format(77); - } - else if (compressionOptions.format == Format_DXT5n) { - header.setDX10Format(77); - if (inputOptions.isNormalMap) header.setNormalFlag(true); - } - else if (compressionOptions.format == Format_BC4) { - header.setDX10Format(80); - } - else if (compressionOptions.format == Format_BC5) { - header.setDX10Format(83); - if (inputOptions.isNormalMap) header.setNormalFlag(true); - } - else { - supported = false; - } - } - } - else - { - if (compressionOptions.format == Format_RGBA) - { - // Get output bit count. - header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.getBitCount())); - - if (compressionOptions.pixelType == PixelType_Float) - { - if (compressionOptions.rsize == 16 && compressionOptions.gsize == 0 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) - { - header.setFormatCode(111); // D3DFMT_R16F - } - else if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) - { - header.setFormatCode(112); // D3DFMT_G16R16F - } - else if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 16 && compressionOptions.asize == 16) - { - header.setFormatCode(113); // D3DFMT_A16B16G16R16F - } - else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 0 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) - { - header.setFormatCode(114); // D3DFMT_R32F - } - else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 32 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) - { - header.setFormatCode(115); // D3DFMT_G32R32F - } - else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 32 && compressionOptions.bsize == 32 && compressionOptions.asize == 32) - { - header.setFormatCode(116); // D3DFMT_A32B32G32R32F - } - else - { - supported = false; - } - } - else // Fixed point - { - const uint bitcount = compressionOptions.getBitCount(); - - if (compressionOptions.bitcount != 0) - { - // Masks already computed. - header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask); - } - else if (bitcount <= 32) - { - // Compute pixel format masks. - const uint ashift = 0; - const uint bshift = ashift + compressionOptions.asize; - const uint gshift = bshift + compressionOptions.bsize; - const uint rshift = gshift + compressionOptions.gsize; - - const uint rmask = ((1 << compressionOptions.rsize) - 1) << rshift; - const uint gmask = ((1 << compressionOptions.gsize) - 1) << gshift; - const uint bmask = ((1 << compressionOptions.bsize) - 1) << bshift; - const uint amask = ((1 << compressionOptions.asize) - 1) << ashift; - - header.setPixelFormat(bitcount, rmask, gmask, bmask, amask); - } - else - { - supported = false; - } - } - } - else - { - header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format)); - - if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) { - header.setFourCC('D', 'X', 'T', '1'); - if (inputOptions.isNormalMap) header.setNormalFlag(true); - } - else if (compressionOptions.format == Format_DXT3) { - header.setFourCC('D', 'X', 'T', '3'); - } - else if (compressionOptions.format == Format_DXT5) { - header.setFourCC('D', 'X', 'T', '5'); - } - else if (compressionOptions.format == Format_DXT5n) { - header.setFourCC('D', 'X', 'T', '5'); - if (inputOptions.isNormalMap) { - header.setNormalFlag(true); - header.setSwizzleCode('A', '2', 'D', '5'); - //header.setSwizzleCode('x', 'G', 'x', 'R'); - } - } - else if (compressionOptions.format == Format_BC4) { - header.setFourCC('A', 'T', 'I', '1'); - } - else if (compressionOptions.format == Format_BC5) { - header.setFourCC('A', 'T', 'I', '2'); - if (inputOptions.isNormalMap) { - header.setNormalFlag(true); - header.setSwizzleCode('A', '2', 'X', 'Y'); - } - } - else if (compressionOptions.format == Format_CTX1) { - header.setFourCC('C', 'T', 'X', '1'); - if (inputOptions.isNormalMap) header.setNormalFlag(true); - } - else { - supported = false; - } - } - } - - if (!supported) - { - // This container does not support the requested format. - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_UnsupportedOutputFormat); - } - - return false; - } - - if (inputOptions.textureType == TextureType_2D) { - header.setTexture2D(); - } - else if (inputOptions.textureType == TextureType_Cube) { - header.setTextureCube(); - } - /*else if (inputOptions.textureType == TextureType_3D) { - header.setTexture3D(); - header.setDepth(inputOptions.targetDepth); - }*/ - - // Swap bytes if necessary. - header.swapBytes(); - - uint headerSize = 128; - if (header.hasDX10Header()) - { - nvStaticCheck(sizeof(DDSHeader) == 128 + 20); - headerSize = 128 + 20; - } - - bool writeSucceed = outputOptions.outputHandler->writeData(&header, headerSize); - if (!writeSucceed && outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_FileWrite); - } - - return writeSucceed; - } - - return true; -} - -bool Compressor::Private::outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - if (tex.width() <= 0 || tex.height() <= 0 || tex.depth() <= 0 || mipmapCount <= 0) - { - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_InvalidInput); - } - return false; - } - - if (outputOptions.outputHandler == NULL || !outputOptions.outputHeader) - { - return true; - } - - // Output DDS header. - if (outputOptions.container == Container_DDS || outputOptions.container == Container_DDS10) - { - DDSHeader header; - - header.setWidth(tex.width()); - header.setHeight(tex.height()); - header.setMipmapCount(mipmapCount); - - bool supported = true; - - if (outputOptions.container == Container_DDS10) - { - if (compressionOptions.format == Format_RGBA) - { - int bitcount = compressionOptions.bitcount; - if (bitcount == 0) { - bitcount = compressionOptions.rsize + compressionOptions.gsize + compressionOptions.bsize + compressionOptions.asize; - } - - if (bitcount == 16) - { - if (compressionOptions.rsize == 16) - { - header.setDX10Format(56); // R16_UNORM - } - else - { - // B5G6R5_UNORM - // B5G5R5A1_UNORM - supported = false; - } - } - else if (bitcount == 32) - { - // B8G8R8A8_UNORM - // B8G8R8X8_UNORM - // R8G8B8A8_UNORM - // R10G10B10A2_UNORM - supported = false; - } - else { - supported = false; - } - } - else - { - if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) { - header.setDX10Format(71); - if (compressionOptions.format == Format_DXT1a) header.setHasAlphaFlag(true); - if (tex.isNormalMap()) header.setNormalFlag(true); - } - else if (compressionOptions.format == Format_DXT3) { - header.setDX10Format(74); - } - else if (compressionOptions.format == Format_DXT5) { - header.setDX10Format(77); - } - else if (compressionOptions.format == Format_DXT5n) { - header.setDX10Format(77); - if (tex.isNormalMap()) header.setNormalFlag(true); - } - else if (compressionOptions.format == Format_BC4) { - header.setDX10Format(80); - } - else if (compressionOptions.format == Format_BC5) { - header.setDX10Format(83); - if (tex.isNormalMap()) header.setNormalFlag(true); - } - else { - supported = false; - } - } - } - else - { - if (compressionOptions.format == Format_RGBA) - { - // Get output bit count. - header.setPitch(computePitch(tex.width(), compressionOptions.getBitCount())); - - if (compressionOptions.pixelType == PixelType_Float) - { - if (compressionOptions.rsize == 16 && compressionOptions.gsize == 0 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) - { - header.setFormatCode(111); // D3DFMT_R16F - } - else if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) - { - header.setFormatCode(112); // D3DFMT_G16R16F - } - else if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 16 && compressionOptions.asize == 16) - { - header.setFormatCode(113); // D3DFMT_A16B16G16R16F - } - else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 0 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) - { - header.setFormatCode(114); // D3DFMT_R32F - } - else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 32 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) - { - header.setFormatCode(115); // D3DFMT_G32R32F - } - else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 32 && compressionOptions.bsize == 32 && compressionOptions.asize == 32) - { - header.setFormatCode(116); // D3DFMT_A32B32G32R32F - } - else - { - supported = false; - } - } - else // Fixed point - { - const uint bitcount = compressionOptions.getBitCount(); - - if (compressionOptions.bitcount != 0) - { - // Masks already computed. - header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask); - } - else if (bitcount <= 32) - { - // Compute pixel format masks. - const uint ashift = 0; - const uint bshift = ashift + compressionOptions.asize; - const uint gshift = bshift + compressionOptions.bsize; - const uint rshift = gshift + compressionOptions.gsize; - - const uint rmask = ((1 << compressionOptions.rsize) - 1) << rshift; - const uint gmask = ((1 << compressionOptions.gsize) - 1) << gshift; - const uint bmask = ((1 << compressionOptions.bsize) - 1) << bshift; - const uint amask = ((1 << compressionOptions.asize) - 1) << ashift; - - header.setPixelFormat(bitcount, rmask, gmask, bmask, amask); - } - else - { - supported = false; - } - } - } - else - { - header.setLinearSize(computeImageSize(tex.width(), tex.height(), tex.depth(), compressionOptions.bitcount, compressionOptions.format)); - - if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) { - header.setFourCC('D', 'X', 'T', '1'); - if (tex.isNormalMap()) header.setNormalFlag(true); - } - else if (compressionOptions.format == Format_DXT3) { - header.setFourCC('D', 'X', 'T', '3'); - } - else if (compressionOptions.format == Format_DXT5) { - header.setFourCC('D', 'X', 'T', '5'); - } - else if (compressionOptions.format == Format_DXT5n) { - header.setFourCC('D', 'X', 'T', '5'); - if (tex.isNormalMap()) { - header.setNormalFlag(true); - header.setSwizzleCode('A', '2', 'D', '5'); - //header.setSwizzleCode('x', 'G', 'x', 'R'); - } - } - else if (compressionOptions.format == Format_BC4) { - header.setFourCC('A', 'T', 'I', '1'); - } - else if (compressionOptions.format == Format_BC5) { - header.setFourCC('A', 'T', 'I', '2'); - if (tex.isNormalMap()) { - header.setNormalFlag(true); - header.setSwizzleCode('A', '2', 'X', 'Y'); - } - } - else if (compressionOptions.format == Format_CTX1) { - header.setFourCC('C', 'T', 'X', '1'); - if (tex.isNormalMap()) header.setNormalFlag(true); - } - else { - supported = false; - } - } - } - - if (!supported) - { - // This container does not support the requested format. - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_UnsupportedOutputFormat); - } - - return false; - } - - if (tex.textureType() == TextureType_2D) { - header.setTexture2D(); - } - else if (tex.textureType() == TextureType_Cube) { - header.setTextureCube(); - } - /*else if (tex.textureType() == TextureType_3D) { - header.setTexture3D(); - header.setDepth(tex.depth()); - }*/ - - // Swap bytes if necessary. - header.swapBytes(); - - uint headerSize = 128; - if (header.hasDX10Header()) - { - nvStaticCheck(sizeof(DDSHeader) == 128 + 20); - headerSize = 128 + 20; - } - - bool writeSucceed = outputOptions.outputHandler->writeData(&header, headerSize); - if (!writeSucceed && outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_FileWrite); - } - - return writeSucceed; - } - - return true; -} - - -bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const -{ - uint w = inputOptions.targetWidth; - uint h = inputOptions.targetHeight; - uint d = inputOptions.targetDepth; - - Mipmap mipmap; - - const uint mipmapCount = inputOptions.realMipmapCount(); - nvDebugCheck(mipmapCount > 0); - - for (uint m = 0; m < mipmapCount; m++) - { - if (outputOptions.outputHandler) - { - int size = computeImageSize(w, h, d, compressionOptions.getBitCount(), compressionOptions.format); - outputOptions.outputHandler->beginImage(size, w, h, d, f, m); - } - - if (!initMipmap(mipmap, inputOptions, w, h, d, f, m)) - { - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_InvalidInput); - return false; - } - } - - if (compressionOptions.pixelType == PixelType_Float) - { - mipmap.toFloatImage(inputOptions); - - // @@ Convert to linear space. - } - else - { - // Convert linear float image to fixed image ready for compression. - mipmap.toFixedImage(inputOptions); - - if (inputOptions.premultiplyAlpha) - { - premultiplyAlphaMipmap(mipmap, inputOptions); - } - -#pragma message(NV_FILE_LINE "TODO: All color transforms should be done here!") - - // Apply gamma space color transforms: - if (inputOptions.colorTransform == ColorTransform_YCoCg) - { - ColorSpace::RGBtoYCoCg_R(mipmap.asMutableFixedImage()); - } - else if (inputOptions.colorTransform == ColorTransform_ScaledYCoCg) - { - // @@ TODO - //ColorSpace::RGBtoYCoCg_R(mipmap.asMutableFixedImage()); - } - - /*// Apply linear transforms in linear space. - if (inputOptions.colorTransform == ColorTransform_Linear) - { - FloatImage * image = mipmap.asMutableFloatImage(); - nvDebugCheck(image != NULL); - - Vector4 offset( - inputOptions.colorOffsets[0], - inputOptions.colorOffsets[1], - inputOptions.colorOffsets[2], - inputOptions.colorOffsets[3]); - - image->transform(0, inputOptions.linearTransform, offset); - } - else if (inputOptions.colorTransform == ColorTransform_Swizzle) - { - FloatImage * image = mipmap.asMutableFloatImage(); - nvDebugCheck(image != NULL); - - image->swizzle(0, inputOptions.swizzleTransform[0], inputOptions.swizzleTransform[1], inputOptions.swizzleTransform[2], inputOptions.swizzleTransform[3]); - }*/ - - quantizeMipmap(mipmap, compressionOptions); - } - - compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions); - - // Compute extents of next mipmap: - w = max(1U, w / 2); - h = max(1U, h / 2); - d = max(1U, d / 2); - } - - return true; -} - -bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const -{ - // Find image from input. - int inputIdx = findExactMipmap(inputOptions, w, h, d, f); - - if ((inputIdx == -1 || inputOptions.convertToNormalMap) && m != 0) - { - // Generate from last, when mipmap not found, or normal map conversion enabled. - downsampleMipmap(mipmap, inputOptions); - } - else - { - if (inputIdx != -1) - { - // If input mipmap found, then get from input. - mipmap.setFromInput(inputOptions, inputIdx); - } - else - { - // If not found, resize closest mipmap. - inputIdx = findClosestMipmap(inputOptions, w, h, d, f); - - if (inputIdx == -1) - { - return false; - } - - mipmap.setFromInput(inputOptions, inputIdx); - - scaleMipmap(mipmap, inputOptions, w, h, d); - } - - processInputImage(mipmap, inputOptions); - } - - return true; -} - -int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const -{ - for (int m = 0; m < int(inputOptions.mipmapCount); m++) - { - int idx = f * inputOptions.mipmapCount + m; - const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx]; - - if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d)) - { - if (inputImage.hasValidData()) - { - return idx; - } - return -1; - } - else if (inputImage.width < int(w) || inputImage.height < int(h) || inputImage.depth < int(d)) - { - return -1; - } - } - - return -1; -} - -int Compressor::Private::findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const -{ - int bestIdx = -1; - - for (int m = 0; m < int(inputOptions.mipmapCount); m++) - { - int idx = f * inputOptions.mipmapCount + m; - const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx]; - - if (inputImage.hasValidData()) - { - int difference = (inputImage.width - w) + (inputImage.height - h) + (inputImage.depth - d); - - if (difference < 0) - { - if (bestIdx == -1) - { - bestIdx = idx; - } - - return bestIdx; - } - - bestIdx = idx; - } - } - - return bestIdx; -} - -// Create mipmap from the given image. -void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const -{ - // Make sure that floating point linear representation is available. - mipmap.toFloatImage(inputOptions); - - const FloatImage * floatImage = mipmap.asFloatImage(); - FloatImage::WrapMode wrapMode = (FloatImage::WrapMode)inputOptions.wrapMode; - - if (inputOptions.alphaMode == AlphaMode_Transparency) - { - if (inputOptions.mipmapFilter == MipmapFilter_Box) - { - BoxFilter filter; - mipmap.setImage(floatImage->downSample(filter, wrapMode, 3)); - } - else if (inputOptions.mipmapFilter == MipmapFilter_Triangle) - { - TriangleFilter filter; - mipmap.setImage(floatImage->downSample(filter, wrapMode, 3)); - } - else /*if (inputOptions.mipmapFilter == MipmapFilter_Kaiser)*/ - { - nvDebugCheck(inputOptions.mipmapFilter == MipmapFilter_Kaiser); - KaiserFilter filter(inputOptions.kaiserWidth); - filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch); - mipmap.setImage(floatImage->downSample(filter, wrapMode, 3)); - } - } - else - { - if (inputOptions.mipmapFilter == MipmapFilter_Box) - { - // Use fast downsample. - mipmap.setImage(floatImage->fastDownSample()); - } - else if (inputOptions.mipmapFilter == MipmapFilter_Triangle) - { - TriangleFilter filter; - mipmap.setImage(floatImage->downSample(filter, wrapMode)); - } - else /*if (inputOptions.mipmapFilter == MipmapFilter_Kaiser)*/ - { - nvDebugCheck(inputOptions.mipmapFilter == MipmapFilter_Kaiser); - KaiserFilter filter(inputOptions.kaiserWidth); - filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch); - mipmap.setImage(floatImage->downSample(filter, wrapMode)); - } - } - - // Normalize mipmap. - if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps) - { - normalizeNormalMap(mipmap.asMutableFloatImage()); - } -} - - -void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const -{ - mipmap.toFloatImage(inputOptions); - - // @@ Add more filters. - // @@ Select different filters for downscaling and reconstruction. - - // Resize image. - BoxFilter boxFilter; - - if (inputOptions.alphaMode == AlphaMode_Transparency) - { - mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode, 3)); - } - else - { - mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode)); - } -} - - -void Compressor::Private::premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const -{ - nvDebugCheck(mipmap.asFixedImage() != NULL); - - Image * image = mipmap.asMutableFixedImage(); - - const uint w = image->width(); - const uint h = image->height(); - - const uint count = w * h; - - for (uint i = 0; i < count; ++i) - { - Color32 c = image->pixel(i); - - c.r = (uint(c.r) * uint(c.a)) >> 8; - c.g = (uint(c.g) * uint(c.a)) >> 8; - c.b = (uint(c.b) * uint(c.a)) >> 8; - - image->pixel(i) = c; - } -} - -// Process an input image: Convert to normal map, normalize, or convert to linear space. -void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const -{ - if (inputOptions.convertToNormalMap) - { - mipmap.toFixedImage(inputOptions); - - Vector4 heightScale = inputOptions.heightFactors; - mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale)); - } - else if (inputOptions.isNormalMap) - { - if (inputOptions.normalizeMipmaps) - { - // If floating point image available, normalize in place. - if (mipmap.asFloatImage() == NULL) - { - FloatImage * floatImage = new FloatImage(mipmap.asFixedImage()); - normalizeNormalMap(floatImage); - mipmap.setImage(floatImage); - } - else - { - normalizeNormalMap(mipmap.asMutableFloatImage()); - mipmap.setImage(mipmap.asMutableFloatImage()); - } - } - } - else - { - if (inputOptions.inputGamma != inputOptions.outputGamma || - inputOptions.colorTransform == ColorTransform_Linear || - inputOptions.colorTransform == ColorTransform_Swizzle) - { - mipmap.toFloatImage(inputOptions); - } - -#pragma message(NV_FILE_LINE "FIXME: Do not perform color transforms here!") - - /*// Apply linear transforms in linear space. - if (inputOptions.colorTransform == ColorTransform_Linear) - { - FloatImage * image = mipmap.asMutableFloatImage(); - nvDebugCheck(image != NULL); - - Vector4 offset( - inputOptions.colorOffsets[0], - inputOptions.colorOffsets[1], - inputOptions.colorOffsets[2], - inputOptions.colorOffsets[3]); - - image->transform(0, inputOptions.linearTransform, offset); - } - else if (inputOptions.colorTransform == ColorTransform_Swizzle) - { - FloatImage * image = mipmap.asMutableFloatImage(); - nvDebugCheck(image != NULL); - - image->swizzle(0, inputOptions.swizzleTransform[0], inputOptions.swizzleTransform[1], inputOptions.swizzleTransform[2], inputOptions.swizzleTransform[3]); - }*/ - } -} - - -// Quantize the given mipmap according to the compression options. -void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const -{ - nvDebugCheck(mipmap.asFixedImage() != NULL); - - if (compressionOptions.binaryAlpha) - { - if (compressionOptions.enableAlphaDithering) - { - Quantize::FloydSteinberg_BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold); - } - else - { - Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold); - } - } - - if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering) - { - uint rsize = 8; - uint gsize = 8; - uint bsize = 8; - uint asize = 8; - - if (compressionOptions.enableColorDithering) - { - if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5) - { - rsize = 5; - gsize = 6; - bsize = 5; - } - else if (compressionOptions.format == Format_RGB) - { - uint rshift, gshift, bshift; - PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize); - PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize); - PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize); - } - } - - if (compressionOptions.enableAlphaDithering) - { - if (compressionOptions.format == Format_DXT3) - { - asize = 4; - } - else if (compressionOptions.format == Format_RGB) - { - uint ashift; - PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize); - } - } - - if (compressionOptions.binaryAlpha) - { - asize = 8; // Already quantized. - } - - Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize); - } -} - - -CompressorInterface * Compressor::Private::chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const -{ - if (compressionOptions.format == Format_DXT1) - { -#if defined(HAVE_S3QUANT) - if (compressionOptions.externalCompressor == "s3") return new S3CompressorDXT1; - else -#endif - -#if defined(HAVE_ATITC) - if (compressionOptions.externalCompressor == "ati") return new AtiCompressorDXT1; - else -#endif - -#if defined(HAVE_SQUISH) - if (compressionOptions.externalCompressor == "squish") return new SquishCompressorDXT1; - else -#endif - -#if defined(HAVE_D3DX) - if (compressionOptions.externalCompressor == "d3dx") return new D3DXCompressorDXT1; - else -#endif - -#if defined(HAVE_D3DX) - if (compressionOptions.externalCompressor == "stb") return new StbCompressorDXT1; - else -#endif - - if (compressionOptions.quality == Quality_Fastest) - { - return new FastCompressorDXT1; - } - - return new NormalCompressorDXT1; - } - else if (compressionOptions.format == Format_DXT1a) - { - if (compressionOptions.quality == Quality_Fastest) - { - return new FastCompressorDXT1a; - } - - return new NormalCompressorDXT1a; - } - else if (compressionOptions.format == Format_DXT1n) - { - // Not supported. - } - else if (compressionOptions.format == Format_DXT3) - { - if (compressionOptions.quality == Quality_Fastest) - { - return new FastCompressorDXT3; - } - - return new NormalCompressorDXT3; - } - else if (compressionOptions.format == Format_DXT5) - { -#if defined(HAVE_ATITC) - if (compressionOptions.externalCompressor == "ati") return new AtiCompressorDXT5; - else -#endif - - if (compressionOptions.quality == Quality_Fastest) - { - return new FastCompressorDXT5; - } - - return new NormalCompressorDXT5; - } - else if (compressionOptions.format == Format_DXT5n) - { - if (compressionOptions.quality == Quality_Fastest) - { - return new FastCompressorDXT5n; - } - - return new NormalCompressorDXT5n; - } - else if (compressionOptions.format == Format_BC4) - { - if (compressionOptions.quality == Quality_Fastest || compressionOptions.quality == Quality_Normal) - { - return new FastCompressorBC4; - } - - return new ProductionCompressorBC4; - } - else if (compressionOptions.format == Format_BC5) - { - if (compressionOptions.quality == Quality_Fastest || compressionOptions.quality == Quality_Normal) - { - return new FastCompressorBC5; - } - - return new ProductionCompressorBC5; - } - else if (compressionOptions.format == Format_CTX1) - { - // Not supported. - } - else if (compressionOptions.format == Format_BC6) - { - // Not supported. - } - else if (compressionOptions.format == Format_BC7) - { - // Not supported. - } - - return NULL; -} - - -CompressorInterface * Compressor::Private::chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const -{ - nvDebugCheck(cudaSupported); - - if (compressionOptions.quality == Quality_Fastest) - { - // Do not use CUDA compressors in fastest quality mode. - return NULL; - } - - if (compressionOptions.format == Format_DXT1) - { - return new CudaCompressorDXT1(*cuda); - } - else if (compressionOptions.format == Format_DXT1a) - { -#pragma message(NV_FILE_LINE "TODO: Implement CUDA DXT1a compressor.") - } - else if (compressionOptions.format == Format_DXT1n) - { - // Not supported. - } - else if (compressionOptions.format == Format_DXT3) - { - return new CudaCompressorDXT3(*cuda); - } - else if (compressionOptions.format == Format_DXT5) - { - return new CudaCompressorDXT5(*cuda); - } - else if (compressionOptions.format == Format_DXT5n) - { - // @@ Return CUDA compressor. - } - else if (compressionOptions.format == Format_BC4) - { - // Not supported. - } - else if (compressionOptions.format == Format_BC5) - { - // Not supported. - } - else if (compressionOptions.format == Format_CTX1) - { - // @@ Return CUDA compressor. - } - else if (compressionOptions.format == Format_BC6) - { - // Not supported. - } - else if (compressionOptions.format == Format_BC7) - { - // Not supported. - } - - return NULL; -} - - - -// Compress the given mipmap. -bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const -{ - if (compressionOptions.format == Format_RGBA) - { - // Pixel format conversion. - if (compressionOptions.pixelType == PixelType_Float) - { - compressRGB(mipmap.asFloatImage(), outputOptions, compressionOptions); - } - else - { - compressRGB(mipmap.asFixedImage(), outputOptions, compressionOptions); - } - } - else - { - const Image * image = mipmap.asFixedImage(); - nvDebugCheck(image != NULL); - - // Decide what compressor to use. - CompressorInterface * compressor = NULL; - if (cudaEnabled && image->width() * image->height() >= 512) - { - compressor = chooseGpuCompressor(compressionOptions); - } - if (compressor == NULL) - { - compressor = chooseCpuCompressor(compressionOptions); - } - - if (compressor == NULL) - { - if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature); - } - else - { - compressor->compress(InputFormat_BGRA_8UB, inputOptions.alphaMode, image->width(), image->height(), (void *)image->pixels(), compressionOptions, outputOptions); - - delete compressor; - } - } - - return true; -} - - -int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const -{ - const Format format = compressionOptions.format; - - uint bitCount = compressionOptions.bitcount; - if (format == Format_RGBA && bitCount == 0) bitCount = compressionOptions.rsize + compressionOptions.gsize + compressionOptions.bsize + compressionOptions.asize; - - inputOptions.computeTargetExtents(); - - uint mipmapCount = inputOptions.realMipmapCount(); - - int size = 0; - - for (uint f = 0; f < inputOptions.faceCount; f++) - { - uint w = inputOptions.targetWidth; - uint h = inputOptions.targetHeight; - uint d = inputOptions.targetDepth; - - for (uint m = 0; m < mipmapCount; m++) - { - size += computeImageSize(w, h, d, bitCount, format); - - // Compute extents of next mipmap: - w = max(1U, w / 2); - h = max(1U, h / 2); - d = max(1U, d / 2); - } - } - - return size; -} diff --git a/src/nvtt/InputOptions.cpp b/src/nvtt/InputOptions.cpp index 03e351f..875a1eb 100644 --- a/src/nvtt/InputOptions.cpp +++ b/src/nvtt/InputOptions.cpp @@ -23,11 +23,8 @@ #include // memcpy -#include // nextPowerOfTwo #include -#include - #include "nvtt.h" #include "InputOptions.h" @@ -97,15 +94,13 @@ void InputOptions::reset() m.textureType = TextureType_2D; m.inputFormat = InputFormat_BGRA_8UB; - m.alphaMode = AlphaMode_Transparency; + m.alphaMode = AlphaMode_None; m.inputGamma = 2.2f; m.outputGamma = 2.2f; m.colorTransform = ColorTransform_None; m.linearTransform = Matrix(identity); - for (int i = 0; i < 4; i++) m.colorOffsets[i] = 0; - for (int i = 0; i < 4; i++) m.swizzleTransform[i] = i; m.generateMipmaps = true; m.maxLevel = -1; @@ -123,8 +118,6 @@ void InputOptions::reset() m.maxExtent = 0; m.roundMode = RoundMode_None; - - m.premultiplyAlpha = false; } @@ -168,8 +161,7 @@ void InputOptions::setTextureLayout(TextureType type, int width, int height, int img.mipLevel = mipLevel; img.face = f; - img.uint8data = NULL; - img.floatdata = NULL; + img.data = NULL; w = max(1U, w / 2); h = max(1U, h / 2); @@ -207,116 +199,14 @@ bool InputOptions::setMipmapData(const void * data, int width, int height, int d return false; } - switch(m.inputFormat) - { - case InputFormat_BGRA_8UB: - if (Image * image = new nv::Image()) - { - image->allocate(width, height); - memcpy(image->pixels(), data, width * height * 4); - m.images[idx].uint8data = image; - } - else - { - // @@ Out of memory error. - return false; - } - break; - case InputFormat_RGBA_32F: - if (FloatImage * image = new nv::FloatImage()) - { - const float * floatData = (const float *)data; - image->allocate(4, width, height); - - for (int c = 0; c < 4; c++) - { - float * channel = image->channel(c); - for (int i = 0; i < width * height; i++) - { - channel[i] = floatData[i*4 + c]; - } - } - - m.images[idx].floatdata = image; - } - else - { - // @@ Out of memory error. - return false; - } - break; - default: - return false; - } + m.images[idx].data = new nv::Image(); + m.images[idx].data->allocate(width, height); + memcpy(m.images[idx].data->pixels(), data, width * height * 4); return true; } -// Copies data -bool InputOptions::setMipmapChannelData(const void * data, int channel, int width, int height, int depth /*= 1*/, int face /*= 0*/, int mipLevel /*= 0*/) -{ - nvCheck(depth == 1); - nvCheck(channel >= 0 && channel < 4); - - const int idx = face * m.mipmapCount + mipLevel; - - if (m.images[idx].width != width || m.images[idx].height != height || m.images[idx].depth != depth || m.images[idx].mipLevel != mipLevel || m.images[idx].face != face) - { - // Invalid dimension or index. - return false; - } - - // Allocate image if not allocated already. - if (m.inputFormat == InputFormat_BGRA_8UB) - { - m.images[idx].floatdata = NULL; - if (m.images[idx].uint8data == NULL) - { - m.images[idx].uint8data = new Image(); - m.images[idx].uint8data->allocate(width, height); - m.images[idx].uint8data->fill(Color32(0,0,0,0)); - } - } - else if (m.inputFormat == InputFormat_RGBA_32F) - { - m.images[idx].uint8data = NULL; - if (m.images[idx].floatdata == NULL) - { - m.images[idx].floatdata = new FloatImage(); - m.images[idx].floatdata->allocate(4, width, height); - m.images[idx].floatdata->clear(); - } - - - } - else - { - m.images[idx].floatdata = NULL; - m.images[idx].uint8data = NULL; - return false; - } - - // Copy channel data to image. - if (m.inputFormat == InputFormat_BGRA_8UB) - { - // @@ TODO - } - else if (m.inputFormat == InputFormat_RGBA_32F) - { - const float * floatData = (const float *)data; - float * channelPtr = m.images[idx].floatdata->channel(channel); - - for (int i = 0; i < width * height; i++) - { - channelPtr[i] = floatData[i]; - } - } - - return true; -} - - /// Describe the format of the input. void InputOptions::setFormat(InputFormat format) { @@ -411,32 +301,8 @@ void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2, { nvCheck(channel >= 0 && channel < 4); - m.linearTransform(channel, 0) = w0; - m.linearTransform(channel, 1) = w1; - m.linearTransform(channel, 2) = w2; - m.linearTransform(channel, 3) = w3; -} - -void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2, float w3, float offset) -{ - nvCheck(channel >= 0 && channel < 4); - - setLinearTransform(channel, w0, w1, w2, w3); - - m.colorOffsets[channel] = offset; -} - -void InputOptions::setSwizzleTransform(int x, int y, int z, int w) -{ - nvCheck(x >= 0 && x <= 6); - nvCheck(y >= 0 && y <= 6); - nvCheck(z >= 0 && z <= 6); - nvCheck(w >= 0 && w <= 6); - - m.swizzleTransform[0] = x; - m.swizzleTransform[1] = y; - m.swizzleTransform[2] = z; - m.swizzleTransform[3] = w; + Vector4 w(w0, w1, w2, w3); + //m.linearTransform.setRow(channel, w); } void InputOptions::setMaxExtents(int e) @@ -450,10 +316,6 @@ void InputOptions::setRoundMode(RoundMode mode) m.roundMode = mode; } -void InputOptions::setPremultiplyAlpha(bool b) -{ - m.premultiplyAlpha = b; -} void InputOptions::Private::computeTargetExtents() const { @@ -533,7 +395,7 @@ const Image * InputOptions::Private::image(uint face, uint mipmap) const nvDebugCheck(image.face == face); nvDebugCheck(image.mipLevel == mipmap); - return image.uint8data.ptr(); + return image.data.ptr(); } const Image * InputOptions::Private::image(uint idx) const @@ -542,14 +404,5 @@ const Image * InputOptions::Private::image(uint idx) const const InputImage & image = this->images[idx]; - return image.uint8data.ptr(); -} - -const FloatImage * InputOptions::Private::floatImage(uint idx) const -{ - nvDebugCheck(idx < faceCount * mipmapCount); - - const InputImage & image = this->images[idx]; - - return image.floatdata.ptr(); + return image.data.ptr(); } diff --git a/src/nvtt/InputOptions.h b/src/nvtt/InputOptions.h index 444a7ca..2c922af 100644 --- a/src/nvtt/InputOptions.h +++ b/src/nvtt/InputOptions.h @@ -28,7 +28,6 @@ #include #include #include -#include #include "nvtt.h" namespace nvtt @@ -57,8 +56,6 @@ namespace nvtt // Color transform. ColorTransform colorTransform; nv::Matrix linearTransform; - float colorOffsets[4]; - uint swizzleTransform[4]; // Mipmap generation options. bool generateMipmaps; @@ -81,8 +78,6 @@ namespace nvtt uint maxExtent; RoundMode roundMode; - bool premultiplyAlpha; - // @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere... mutable uint targetWidth; mutable uint targetHeight; @@ -94,9 +89,7 @@ namespace nvtt int realMipmapCount() const; const nv::Image * image(uint face, uint mipmap) const; - const nv::Image * image(uint idx) const; - - const nv::FloatImage * floatImage(uint idx) const; + const nv::Image * image(uint idx) const; }; @@ -105,8 +98,6 @@ namespace nvtt { InputImage() {} - bool hasValidData() const { return uint8data != NULL || floatdata != NULL; } - int mipLevel; int face; @@ -114,8 +105,7 @@ namespace nvtt int height; int depth; - nv::AutoPtr uint8data; - nv::AutoPtr floatdata; + nv::AutoPtr data; }; } // nvtt namespace diff --git a/src/nvtt/OptimalCompressDXT.cpp b/src/nvtt/OptimalCompressDXT.cpp index 3070519..4460781 100644 --- a/src/nvtt/OptimalCompressDXT.cpp +++ b/src/nvtt/OptimalCompressDXT.cpp @@ -21,17 +21,16 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include "OptimalCompressDXT.h" -#include "SingleColorLookup.h" +#include // swap + +#include #include #include -#include - -#include // swap +#include "OptimalCompressDXT.h" +#include "SingleColorLookup.h" -#include using namespace nv; using namespace OptimalCompress; @@ -40,37 +39,10 @@ using namespace OptimalCompress; namespace { - static int greenDistance(int g0, int g1) - { - //return abs(g0 - g1); - int d = g0 - g1; - return d * d; - } - - static int alphaDistance(int a0, int a1) - { - //return abs(a0 - a1); - int d = a0 - a1; - return d * d; - } - - static uint nearestGreen4(uint green, uint maxGreen, uint minGreen) - { - uint bias = maxGreen + (maxGreen - minGreen) / 6; - - uint index = 0; - if (maxGreen - minGreen != 0) index = clamp(3 * (bias - green) / (maxGreen - minGreen), 0U, 3U); - - return (index * minGreen + (3 - index) * maxGreen) / 3; - } - - static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block, int bestError = INT_MAX) + static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block) { nvDebugCheck(block != NULL); - // uint g0 = (block->col0.g << 2) | (block->col0.g >> 4); - // uint g1 = (block->col1.g << 2) | (block->col1.g >> 4); - int palette[4]; palette[0] = (block->col0.g << 2) | (block->col0.g >> 4); palette[1] = (block->col1.g << 2) | (block->col1.g >> 4); @@ -78,24 +50,17 @@ namespace palette[3] = (2 * palette[1] + palette[0]) / 3; int totalError = 0; + for (int i = 0; i < 16; i++) { const int green = rgba.color(i).g; - int error = greenDistance(green, palette[0]); - error = min(error, greenDistance(green, palette[1])); - error = min(error, greenDistance(green, palette[2])); - error = min(error, greenDistance(green, palette[3])); - + int error = abs(green - palette[0]); + error = min(error, abs(green - palette[1])); + error = min(error, abs(green - palette[2])); + error = min(error, abs(green - palette[3])); + totalError += error; - - // totalError += nearestGreen4(green, g0, g1); - - if (totalError > bestError) - { - // early out - return totalError; - } } return totalError; @@ -113,10 +78,10 @@ namespace { const int color = rgba.color(i).g; - uint d0 = greenDistance(color0, color); - uint d1 = greenDistance(color1, color); - uint d2 = greenDistance(color2, color); - uint d3 = greenDistance(color3, color); + uint d0 = abs(color0 - color); + uint d1 = abs(color1 - color); + uint d2 = abs(color2 - color); + uint d3 = abs(color3 - color); uint b0 = d0 > d3; uint b1 = d1 > d2; @@ -137,78 +102,49 @@ namespace // Choose quantized color that produces less error. Used by DXT3 compressor. inline static uint quantize4(uint8 a) { - int q0 = max(int(a >> 4) - 1, 0); + int q0 = (a >> 4) - 1; int q1 = (a >> 4); - int q2 = min(int(a >> 4) + 1, 0xF); + int q2 = (a >> 4) + 1; q0 = (q0 << 4) | q0; q1 = (q1 << 4) | q1; q2 = (q2 << 4) | q2; - int d0 = alphaDistance(q0, a); - int d1 = alphaDistance(q1, a); - int d2 = alphaDistance(q2, a); + int d0 = abs(q0 - a); + int d1 = abs(q1 - a); + int d2 = abs(q2 - a); if (d0 < d1 && d0 < d2) return q0 >> 4; if (d1 < d2) return q1 >> 4; return q2 >> 4; } - static uint nearestAlpha8(uint alpha, uint maxAlpha, uint minAlpha) - { - float bias = maxAlpha + float(maxAlpha - minAlpha) / (2.0f * 7.0f); - float scale = 7.0f / float(maxAlpha - minAlpha); - - uint index = (uint)clamp((bias - float(alpha)) * scale, 0.0f, 7.0f); - - return (index * minAlpha + (7 - index) * maxAlpha) / 7; - } - - static uint computeAlphaError8(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX) - { - int totalError = 0; - - for (uint i = 0; i < 16; i++) - { - uint8 alpha = rgba.color(i).a; - - totalError += alphaDistance(alpha, nearestAlpha8(alpha, block->alpha0, block->alpha1)); - - if (totalError > bestError) - { - // early out - return totalError; - } - } - - return totalError; - } - - static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX) + static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block) { uint8 alphas[8]; block->evaluatePalette(alphas); - int totalError = 0; + uint totalError = 0; for (uint i = 0; i < 16; i++) { uint8 alpha = rgba.color(i).a; - int minDist = INT_MAX; + uint besterror = 256*256; + uint best; for (uint p = 0; p < 8; p++) { - int dist = alphaDistance(alpha, alphas[p]); - minDist = min(dist, minDist); - } - - totalError += minDist; + int d = alphas[p] - alpha; + uint error = d * d; - if (totalError > bestError) - { - // early out - return totalError; + if (error < besterror) + { + besterror = error; + best = p; + } } + + totalError += besterror; } return totalError; @@ -223,21 +159,22 @@ namespace { uint8 alpha = rgba.color(i).a; - int minDist = INT_MAX; - int bestIndex = 8; - for (uint p = 0; p < 8; p++) + uint besterror = 256*256; + uint best = 8; + for(uint p = 0; p < 8; p++) { - int dist = alphaDistance(alpha, alphas[p]); + int d = alphas[p] - alpha; + uint error = d * d; - if (dist < minDist) + if (error < besterror) { - minDist = dist; - bestIndex = p; + besterror = error; + best = p; } } - nvDebugCheck(bestIndex < 8); + nvDebugCheck(best < 8); - block->setIndex(i, bestIndex); + block->setIndex(i, best); } } @@ -280,23 +217,6 @@ void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock) } } -void OptimalCompress::compressDXT1G(uint8 g, BlockDXT1 * dxtBlock) -{ - dxtBlock->col0.r = 31; - dxtBlock->col0.g = OMatch6[g][0]; - dxtBlock->col0.b = 0; - dxtBlock->col1.r = 31; - dxtBlock->col1.g = OMatch6[g][1]; - dxtBlock->col1.b = 0; - dxtBlock->indices = 0xaaaaaaaa; - - if (dxtBlock->col0.u < dxtBlock->col1.u) - { - swap(dxtBlock->col0.u, dxtBlock->col1.u); - dxtBlock->indices ^= 0x55555555; - } -} - // Brute force green channel compressor void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block) @@ -306,23 +226,12 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block) uint8 ming = 63; uint8 maxg = 0; - bool isSingleColor = true; - uint8 singleColor = rgba.color(0).g; - // Get min/max green. for (uint i = 0; i < 16; i++) { - uint8 green = (rgba.color(i).g + 1) >> 2; + uint8 green = rgba.color(i).g >> 2; ming = min(ming, green); maxg = max(maxg, green); - - if (rgba.color(i).g != singleColor) isSingleColor = false; - } - - if (isSingleColor) - { - compressDXT1G(singleColor, block); - return; } block->col0.r = 31; @@ -332,38 +241,36 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block) block->col0.b = 0; block->col1.b = 0; - int bestError = computeGreenError(rgba, block); - int bestg0 = maxg; - int bestg1 = ming; - - // Expand search space a bit. - const int greenExpand = 4; - ming = (ming <= greenExpand) ? 0 : ming - greenExpand; - maxg = (maxg >= 63-greenExpand) ? 63 : maxg + greenExpand; - - for (int g0 = ming+1; g0 <= maxg; g0++) + if (maxg - ming > 4) { - for (int g1 = ming; g1 < g0; g1++) + int besterror = computeGreenError(rgba, block); + int bestg0 = maxg; + int bestg1 = ming; + + for (int g0 = ming+5; g0 < maxg; g0++) { - block->col0.g = g0; - block->col1.g = g1; - int error = computeGreenError(rgba, block, bestError); - - if (error < bestError) + for (int g1 = ming; g1 < g0-4; g1++) { - bestError = error; - bestg0 = g0; - bestg1 = g1; + if ((maxg-g0) + (g1-ming) > besterror) + continue; + + block->col0.g = g0; + block->col1.g = g1; + int error = computeGreenError(rgba, block); + + if (error < besterror) + { + besterror = error; + bestg0 = g0; + bestg1 = g1; + } } } + + block->col0.g = bestg0; + block->col1.g = bestg1; } - block->col0.g = bestg0; - block->col1.g = bestg1; - - nvDebugCheck(bestg0 == bestg1 || block->isFourColorMode()); - - Color32 palette[4]; block->evaluatePalette(palette); block->indices = computeGreenIndices(rgba, palette); @@ -406,26 +313,42 @@ void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dx dxtBlock->alpha0 = maxa; dxtBlock->alpha1 = mina; + /*int centroidDist = 256; + int centroid; + + // Get the closest to the centroid. + for (uint i = 0; i < 16; i++) + { + uint8 alpha = rgba.color(i).a; + int dist = abs(alpha - (maxa + mina) / 2); + if (dist < centroidDist) + { + centroidDist = dist; + centroid = alpha; + } + }*/ + if (maxa - mina > 8) { int besterror = computeAlphaError(rgba, dxtBlock); int besta0 = maxa; int besta1 = mina; - // Expand search space a bit. - const int alphaExpand = 8; - mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand; - maxa = (maxa >= 255-alphaExpand) ? 255 : maxa + alphaExpand; - for (int a0 = mina+9; a0 < maxa; a0++) { for (int a1 = mina; a1 < a0-8; a1++) + //for (int a1 = mina; a1 < maxa; a1++) { - nvDebugCheck(a0 - a1 > 8); + //nvCheck(abs(a1-a0) > 8); + + //if (abs(a0 - a1) < 8) continue; + //if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror) + if ((maxa-a0) + (a1-mina) > besterror) + continue; dxtBlock->alpha0 = a0; dxtBlock->alpha1 = a1; - int error = computeAlphaError(rgba, dxtBlock, besterror); + int error = computeAlphaError(rgba, dxtBlock); if (error < besterror) { diff --git a/src/nvtt/OptimalCompressDXT.h b/src/nvtt/OptimalCompressDXT.h index f315082..6f987f2 100644 --- a/src/nvtt/OptimalCompressDXT.h +++ b/src/nvtt/OptimalCompressDXT.h @@ -26,8 +26,6 @@ #include -#include - namespace nv { struct ColorBlock; @@ -41,7 +39,6 @@ namespace nv { void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock); void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock); - void compressDXT1G(uint8 g, BlockDXT1 * dxtBlock); void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block); void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock); diff --git a/src/nvtt/OutputOptions.cpp b/src/nvtt/OutputOptions.cpp index c02719e..7cf8420 100644 --- a/src/nvtt/OutputOptions.cpp +++ b/src/nvtt/OutputOptions.cpp @@ -33,9 +33,6 @@ OutputOptions::OutputOptions() : m(*new OutputOptions::Private()) OutputOptions::~OutputOptions() { - // Cleanup output handler. - setOutputHandler(NULL); - delete &m; } @@ -46,31 +43,20 @@ void OutputOptions::reset() m.outputHandler = NULL; m.errorHandler = NULL; m.outputHeader = true; - m.container = Container_DDS; } /// Set output file name. void OutputOptions::setFileName(const char * fileName) { - m.fileName = fileName; // @@ Do we need to record filename? + m.fileName = fileName; m.outputHandler = NULL; - - DefaultOutputHandler * oh = new DefaultOutputHandler(fileName); - if (!oh->stream.isError()) - { - m.outputHandler = oh; - } } /// Set output handler. void OutputOptions::setOutputHandler(OutputHandler * outputHandler) { - if (!m.fileName.isNull()) - { - delete m.outputHandler; - m.fileName.reset(); - } + m.fileName.reset(); m.outputHandler = outputHandler; } @@ -86,20 +72,31 @@ void OutputOptions::setOutputHeader(bool outputHeader) m.outputHeader = outputHeader; } -/// Set container. -void OutputOptions::setContainer(Container container) -{ - m.container = container; -} - -bool OutputOptions::Private::hasValidOutputHandler() const +bool OutputOptions::Private::openFile() const { if (!fileName.isNull()) { - return outputHandler != NULL; + nvCheck(outputHandler == NULL); + + DefaultOutputHandler * oh = new DefaultOutputHandler(fileName.str()); + if (oh->stream.isError()) + { + return false; + } + + outputHandler = oh; } return true; } +void OutputOptions::Private::closeFile() const +{ + if (!fileName.isNull()) + { + delete outputHandler; + outputHandler = NULL; + } +} + diff --git a/src/nvtt/OutputOptions.h b/src/nvtt/OutputOptions.h index d90e8bd..bddc28f 100644 --- a/src/nvtt/OutputOptions.h +++ b/src/nvtt/OutputOptions.h @@ -52,7 +52,7 @@ namespace nvtt //return !stream.isError(); return true; } - + nv::StdOutputStream stream; }; @@ -61,12 +61,12 @@ namespace nvtt { nv::Path fileName; - OutputHandler * outputHandler; + mutable OutputHandler * outputHandler; ErrorHandler * errorHandler; bool outputHeader; - Container container; - bool hasValidOutputHandler() const; + bool openFile() const; + void closeFile() const; }; diff --git a/src/nvtt/QuickCompressDXT.cpp b/src/nvtt/QuickCompressDXT.cpp index 9328f69..a8e086f 100644 --- a/src/nvtt/QuickCompressDXT.cpp +++ b/src/nvtt/QuickCompressDXT.cpp @@ -21,10 +21,7 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include // swap - #include -#include #include #include @@ -133,7 +130,7 @@ inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1) return dot(c0-c1, c0-c1); } -inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) +inline static uint computeIndices4(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) { Vector3 palette[4]; palette[0] = maxColor; @@ -165,28 +162,6 @@ inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColo return indices; } -inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) -{ - Vector3 palette[4]; - palette[0] = maxColor; - palette[1] = minColor; - palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f); - palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f); - - float total = 0.0f; - for (int i = 0; i < 16; i++) - { - float d0 = colorDistance(palette[0], block[i]); - float d1 = colorDistance(palette[1], block[i]); - float d2 = colorDistance(palette[2], block[i]); - float d3 = colorDistance(palette[3], block[i]); - - total += min(min(d0, d1), min(d2, d3)); - } - - return total; -} - inline static uint computeIndices3(const ColorBlock & rgba, Vector3::Arg maxColor, Vector3::Arg minColor) { Vector3 palette[4]; @@ -475,8 +450,7 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) // read block Vector3 block[16]; extractColorBlockRGB(rgba, block); - -#if 1 + // find min and max colors Vector3 maxColor, minColor; findMinMaxColorsBox(block, 16, &maxColor, &minColor); @@ -484,31 +458,7 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) selectDiagonal(block, 16, &maxColor, &minColor); insetBBox(&maxColor, &minColor); -#else - float weights[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - Vector3 cluster[4]; - int count = Compute4Means(16, block, weights, Vector3(1, 1, 1), cluster); - - Vector3 maxColor, minColor; - float bestError = FLT_MAX; - - for (int i = 1; i < 4; i++) - { - for (int j = 0; j < i; j++) - { - uint16 color0 = roundAndExpand(&cluster[i]); - uint16 color1 = roundAndExpand(&cluster[j]); - - float error = evaluatePaletteError4(block, cluster[i], cluster[j]); - if (error < bestError) { - bestError = error; - maxColor = cluster[i]; - minColor = cluster[j]; - } - } - } -#endif - + uint16 color0 = roundAndExpand(&maxColor); uint16 color1 = roundAndExpand(&minColor); diff --git a/src/nvtt/TexImage.cpp b/src/nvtt/TexImage.cpp deleted file mode 100644 index 122b81b..0000000 --- a/src/nvtt/TexImage.cpp +++ /dev/null @@ -1,1088 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "TexImage.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -using namespace nv; -using namespace nvtt; - -namespace -{ - // 1 -> 1, 2 -> 2, 3 -> 2, 4 -> 4, 5 -> 4, ... - static uint previousPowerOfTwo(const uint v) - { - return nextPowerOfTwo(v + 1) / 2; - } - - static uint nearestPowerOfTwo(const uint v) - { - const uint np2 = nextPowerOfTwo(v); - const uint pp2 = previousPowerOfTwo(v); - - if (np2 - v <= v - pp2) - { - return np2; - } - else - { - return pp2; - } - } - -#pragma message(NV_FILE_LINE "TODO: Move these functions to a common location.") - - static int blockSize(Format format) - { - if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) { - return 8; - } - else if (format == Format_DXT3) { - return 16; - } - else if (format == Format_DXT5 || format == Format_DXT5n) { - return 16; - } - else if (format == Format_BC4) { - return 8; - } - else if (format == Format_BC5) { - return 16; - } - else if (format == Format_CTX1) { - return 8; - } - return 0; - } - - static uint countMipmaps(int w, int h, int d) - { - uint mipmap = 0; - - while (w != 1 || h != 1 || d != 1) { - w = max(1, w / 2); - h = max(1, h / 2); - d = max(1, d / 2); - mipmap++; - } - - return mipmap + 1; - } -} - - -TexImage::TexImage() : m(new TexImage::Private()) -{ -} - -TexImage::TexImage(const TexImage & tex) : m(tex.m) -{ - if (m != NULL) m->addRef(); -} - -TexImage::~TexImage() -{ - if (m != NULL) m->release(); - m = NULL; -} - -void TexImage::operator=(const TexImage & tex) -{ - if (tex.m != NULL) tex.m->addRef(); - if (m != NULL) m->release(); - m = tex.m; -} - -void TexImage::detach() -{ - if (m->refCount() > 1) - { - m = new TexImage::Private(*m); - m->addRef(); - nvDebugCheck(m->refCount() == 1); - } -} - -void TexImage::setTextureType(TextureType type) -{ - if (m->type != type) - { - detach(); - - m->type = type; - - int count = 0; - if (type == TextureType_2D) - { - count = 1; - } - else - { - nvCheck (type == TextureType_Cube); - count = 6; - } - - // Delete all but the first 'count' images. - const uint imageCount = m->imageArray.count(); - for (uint i = count; i < imageCount; i++) - { - delete m->imageArray[i]; - } - - m->imageArray.resize(count, NULL); - } -} - -void TexImage::setWrapMode(WrapMode wrapMode) -{ - if (m->wrapMode != wrapMode) - { - detach(); - m->wrapMode = wrapMode; - } -} - -void TexImage::setAlphaMode(AlphaMode alphaMode) -{ - if (m->alphaMode != alphaMode) - { - detach(); - m->alphaMode = alphaMode; - } -} - -void TexImage::setNormalMap(bool isNormalMap) -{ - if (m->isNormalMap != isNormalMap) - { - detach(); - m->isNormalMap = isNormalMap; - } -} - -int TexImage::width() const -{ - if (m->imageArray.count() > 0) - { - return m->imageArray[0]->width(); - } - return 0; -} - -int TexImage::height() const -{ - if (m->imageArray.count() > 0) - { - return m->imageArray[0]->height(); - } - return 0; -} - -int TexImage::depth() const -{ - return 1; -} - -int TexImage::faceCount() const -{ - return m->imageArray.count(); -} - -TextureType TexImage::textureType() const -{ - return m->type; -} - -WrapMode TexImage::wrapMode() const -{ - return m->wrapMode; -} - -AlphaMode TexImage::alphaMode() const -{ - return m->alphaMode; -} - -bool TexImage::isNormalMap() const -{ - return m->isNormalMap; -} - -int TexImage::countMipmaps() const -{ - return ::countMipmaps(width(), height(), depth()); -} - - -bool TexImage::load(const char * fileName) -{ -#pragma message(NV_FILE_LINE "TODO: Add support for DDS textures in TexImage::load().") - - AutoPtr img(ImageIO::loadFloat(fileName)); - - if (img == NULL) - { - return false; - } - - detach(); - - m->imageArray.resize(1); - m->imageArray[0] = img.release(); - - return true; -} - -bool TexImage::save(const char * fileName) const -{ -#pragma message(NV_FILE_LINE "TODO: Add support for DDS textures in TexImage::save") - - if (m->imageArray.count() == 0) - { - return false; - } - else - { - return ImageIO::saveFloat(fileName, m->imageArray[0], 0, 4); - } -} - -bool TexImage::setImage2D(nvtt::InputFormat format, int w, int h, int idx, const void * restrict data) -{ - if (idx < 0 || uint(idx) >= m->imageArray.count()) - { - return false; - } - - FloatImage * img = m->imageArray[idx]; - if (img->width() != w || img->height() != h) - { - return false; - } - - detach(); - - const int count = w * h; - - float * restrict rdst = img->channel(0); - float * restrict gdst = img->channel(1); - float * restrict bdst = img->channel(2); - float * restrict adst = img->channel(3); - - if (format == InputFormat_BGRA_8UB) - { - const Color32 * src = (const Color32 *)data; - - try { - for (int i = 0; i < count; i++) - { - rdst[i] = src[i].r; - gdst[i] = src[i].g; - bdst[i] = src[i].b; - adst[i] = src[i].a; - } - } - catch(...) { - return false; - } - } - else if (format == InputFormat_RGBA_32F) - { - const float * src = (const float *)data; - - try { - for (int i = 0; i < count; i++) - { - rdst[i] = src[4 * i + 0]; - gdst[i] = src[4 * i + 1]; - bdst[i] = src[4 * i + 2]; - adst[i] = src[4 * i + 3]; - } - } - catch(...) { - return false; - } - } - - return true; -} - -bool TexImage::setImage2D(InputFormat format, int w, int h, int idx, const void * restrict r, const void * restrict g, const void * restrict b, const void * restrict a) -{ - if (idx < 0 || uint(idx) >= m->imageArray.count()) - { - return false; - } - - FloatImage * img = m->imageArray[idx]; - if (img->width() != w || img->height() != h) - { - return false; - } - - detach(); - - const int count = w * h; - - float * restrict rdst = img->channel(0); - float * restrict gdst = img->channel(1); - float * restrict bdst = img->channel(2); - float * restrict adst = img->channel(3); - - if (format == InputFormat_BGRA_8UB) - { - const uint8 * restrict rsrc = (const uint8 *)r; - const uint8 * restrict gsrc = (const uint8 *)g; - const uint8 * restrict bsrc = (const uint8 *)b; - const uint8 * restrict asrc = (const uint8 *)a; - - try { - for (int i = 0; i < count; i++) rdst[i] = float(rsrc[i]) / 255.0f; - for (int i = 0; i < count; i++) gdst[i] = float(gsrc[i]) / 255.0f; - for (int i = 0; i < count; i++) bdst[i] = float(bsrc[i]) / 255.0f; - for (int i = 0; i < count; i++) adst[i] = float(asrc[i]) / 255.0f; - } - catch(...) { - return false; - } - } - else if (format == InputFormat_RGBA_32F) - { - const float * rsrc = (const float *)r; - const float * gsrc = (const float *)g; - const float * bsrc = (const float *)b; - const float * asrc = (const float *)a; - - try { - memcpy(rdst, rsrc, count * sizeof(float)); - memcpy(gdst, gsrc, count * sizeof(float)); - memcpy(bdst, bsrc, count * sizeof(float)); - memcpy(adst, asrc, count * sizeof(float)); - } - catch(...) { - return false; - } - } - - return true; -} - -bool TexImage::setImage2D(Format format, Decoder decoder, int w, int h, int idx, const void * data) -{ - if (idx < 0 || uint(idx) >= m->imageArray.count()) - { - return false; - } - -#pragma message(NV_FILE_LINE "TODO: Add support for all compressed formats in TexImage::setImage2D.") - - if (format != nvtt::Format_BC1 && format != nvtt::Format_BC2 && format != nvtt::Format_BC3) - { - return false; - } - - FloatImage * img = m->imageArray[idx]; - if (img->width() != w || img->height() != h) - { - return false; - } - - detach(); - - const int count = w * h; - - const int bw = (w + 3) / 4; - const int bh = (h + 3) / 4; - - const uint bs = blockSize(format); - - const uint8 * ptr = (const uint8 *)data; - - try { - for (int y = 0; y < bh; y++) - { - for (int x = 0; x < bw; x++) - { - ColorBlock colors; - - if (format == nvtt::Format_BC1) - { - const BlockDXT1 * block = (const BlockDXT1 *)ptr; - - if (decoder == Decoder_Reference) { - block->decodeBlock(&colors); - } - else if (decoder == Decoder_NV5x) { - block->decodeBlockNV5x(&colors); - } - } - else if (format == nvtt::Format_BC2) - { - const BlockDXT3 * block = (const BlockDXT3 *)ptr; - - if (decoder == Decoder_Reference) { - block->decodeBlock(&colors); - } - else if (decoder == Decoder_NV5x) { - block->decodeBlockNV5x(&colors); - } - } - else if (format == nvtt::Format_BC3) - { - const BlockDXT5 * block = (const BlockDXT5 *)ptr; - - if (decoder == Decoder_Reference) { - block->decodeBlock(&colors); - } - else if (decoder == Decoder_NV5x) { - block->decodeBlockNV5x(&colors); - } - } - - for (int yy = 0; yy < 4; yy++) - { - for (int xx = 0; xx < 4; xx++) - { - Color32 c = colors.color(xx, yy); - - if (x * 4 + xx < w && y * 4 + yy < h) - { - img->setPixel(float(c.r) * 1.0f/255.0f, x*4 + xx, y*4 + yy, 0); - img->setPixel(float(c.g) * 1.0f/255.0f, x*4 + xx, y*4 + yy, 1); - img->setPixel(float(c.b) * 1.0f/255.0f, x*4 + xx, y*4 + yy, 2); - img->setPixel(float(c.a) * 1.0f/255.0f, x*4 + xx, y*4 + yy, 3); - } - } - } - - ptr += bs; - } - } - } - catch(...) { - return false; - } - - return true; -} - - -#pragma message(NV_FILE_LINE "TODO: provide a TexImage::resize that can override filter width and parameters.") - -void TexImage::resize(int w, int h, ResizeFilter filter) -{ - if (m->imageArray.count() > 0) - { - if (w == m->imageArray[0]->width() && h == m->imageArray[0]->height()) return; - } - - if (m->type == TextureType_Cube) - { -#pragma message(NV_FILE_LINE "TODO: Output error when image is cubemap and w != h.") - h = w; - } - - detach(); - - FloatImage::WrapMode wrapMode = (FloatImage::WrapMode)m->wrapMode; - - foreach (i, m->imageArray) - { - FloatImage * img = m->imageArray[i]; - - if (img == NULL) continue; - - if (m->alphaMode == AlphaMode_Transparency) - { - if (filter == ResizeFilter_Box) - { - BoxFilter filter; - m->imageArray[i]->resize(filter, w, h, wrapMode, 3); - } - else if (filter == ResizeFilter_Triangle) - { - TriangleFilter filter; - m->imageArray[i]->resize(filter, w, h, wrapMode, 3); - } - else if (filter == ResizeFilter_Kaiser) - { - //KaiserFilter filter(inputOptions.kaiserWidth); - //filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch); - KaiserFilter filter(3); - m->imageArray[i]->resize(filter, w, h, wrapMode, 3); - } - else //if (filter == ResizeFilter_Mitchell) - { - nvDebugCheck(filter == ResizeFilter_Mitchell); - MitchellFilter filter; - m->imageArray[i]->resize(filter, w, h, wrapMode, 3); - } - } - else - { - if (filter == ResizeFilter_Box) - { - BoxFilter filter; - m->imageArray[i]->resize(filter, w, h, wrapMode); - } - else if (filter == ResizeFilter_Triangle) - { - TriangleFilter filter; - m->imageArray[i]->resize(filter, w, h, wrapMode); - } - else if (filter == ResizeFilter_Kaiser) - { - //KaiserFilter filter(inputOptions.kaiserWidth); - //filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch); - KaiserFilter filter(3); - m->imageArray[i]->resize(filter, w, h, wrapMode); - } - else //if (filter == ResizeFilter_Mitchell) - { - nvDebugCheck(filter == ResizeFilter_Mitchell); - MitchellFilter filter; - m->imageArray[i]->resize(filter, w, h, wrapMode); - } - } - } -} - -void TexImage::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter) -{ - if (m->imageArray.count() > 0) - { - int w = m->imageArray[0]->width(); - int h = m->imageArray[0]->height(); - - nvDebugCheck(w > 0); - nvDebugCheck(h > 0); - - if (roundMode != RoundMode_None) - { - // rounded max extent should never be higher than original max extent. - maxExtent = previousPowerOfTwo(maxExtent); - } - - // Scale extents without changing aspect ratio. - int maxwh = max(w, h); - if (maxExtent != 0 && maxwh > maxExtent) - { - w = max((w * maxExtent) / maxwh, 1); - h = max((h * maxExtent) / maxwh, 1); - } - - // Round to power of two. - if (roundMode == RoundMode_ToNextPowerOfTwo) - { - w = nextPowerOfTwo(w); - h = nextPowerOfTwo(h); - } - else if (roundMode == RoundMode_ToNearestPowerOfTwo) - { - w = nearestPowerOfTwo(w); - h = nearestPowerOfTwo(h); - } - else if (roundMode == RoundMode_ToPreviousPowerOfTwo) - { - w = previousPowerOfTwo(w); - h = previousPowerOfTwo(h); - } - - // Make sure cube faces are square. - if (m->type == TextureType_Cube) - { - w = h = max(w, h); - } - - resize(w, h, filter); - } -} - -bool TexImage::buildNextMipmap(MipmapFilter filter) -{ - if (m->imageArray.count() > 0) - { - int w = m->imageArray[0]->width(); - int h = m->imageArray[0]->height(); - - nvDebugCheck(w > 0); - nvDebugCheck(h > 0); - - if (w == 1 && h == 1) - { - return false; - } - } - - detach(); - - FloatImage::WrapMode wrapMode = (FloatImage::WrapMode)m->wrapMode; - - foreach (i, m->imageArray) - { - if (m->imageArray[i] == NULL) continue; - - if (m->alphaMode == AlphaMode_Transparency) - { - if (filter == MipmapFilter_Box) - { - BoxFilter filter; - m->imageArray[i]->downSample(filter, wrapMode, 3); - } - else if (filter == MipmapFilter_Triangle) - { - TriangleFilter filter; - m->imageArray[i]->downSample(filter, wrapMode, 3); - } - else if (filter == MipmapFilter_Kaiser) - { - nvDebugCheck(filter == MipmapFilter_Kaiser); - //KaiserFilter filter(inputOptions.kaiserWidth); - //filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch); - KaiserFilter filter(3); - m->imageArray[i]->downSample(filter, wrapMode, 3); - } - } - else - { - if (filter == MipmapFilter_Box) - { - m->imageArray[i]->fastDownSample(); - } - else if (filter == MipmapFilter_Triangle) - { - TriangleFilter filter; - m->imageArray[i]->downSample(filter, wrapMode); - } - else //if (filter == MipmapFilter_Kaiser) - { - nvDebugCheck(filter == MipmapFilter_Kaiser); - //KaiserFilter filter(inputOptions.kaiserWidth); - //filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch); - KaiserFilter filter(3); - m->imageArray[i]->downSample(filter, wrapMode); - } - } - } - - return true; -} - -// Color transforms. -void TexImage::toLinear(float gamma) -{ - if (equal(gamma, 1.0f)) return; - - detach(); - - foreach (i, m->imageArray) - { - if (m->imageArray[i] == NULL) continue; - - m->imageArray[i]->toLinear(0, 3, gamma); - } -} - -void TexImage::toGamma(float gamma) -{ - if (equal(gamma, 1.0f)) return; - - detach(); - - foreach (i, m->imageArray) - { - if (m->imageArray[i] == NULL) continue; - - m->imageArray[i]->toGamma(0, 3, gamma); - } -} - -void TexImage::transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]) -{ - detach(); - - Matrix xform( - Vector4(w0[0], w0[1], w0[2], w0[3]), - Vector4(w1[0], w1[1], w1[2], w1[3]), - Vector4(w2[0], w2[1], w2[2], w2[3]), - Vector4(w3[0], w3[1], w3[2], w3[3])); - - Vector4 voffset(offset[0], offset[1], offset[2], offset[3]); - - foreach (i, m->imageArray) - { - if (m->imageArray[i] == NULL) continue; - - m->imageArray[i]->transform(0, xform, voffset); - } -} - -void TexImage::swizzle(int r, int g, int b, int a) -{ - if (r == 0 && g == 1 && b == 2 && a == 3) return; - - detach(); - - foreach (i, m->imageArray) - { - if (m->imageArray[i] == NULL) continue; - - m->imageArray[i]->swizzle(0, r, g, b, a); - } -} - -void TexImage::scaleBias(int channel, float scale, float bias) -{ - if (equal(scale, 1.0f) && equal(bias, 0.0f)) return; - - detach(); - - foreach (i, m->imageArray) - { - if (m->imageArray[i] == NULL) continue; - - m->imageArray[i]->scaleBias(channel, 1, scale, bias); - } -} - -void TexImage::packNormal() -{ - scaleBias(0, 0.5f, 0.5f); - scaleBias(1, 0.5f, 0.5f); - scaleBias(2, 0.5f, 0.5f); -} - -void TexImage::expandNormal() -{ - scaleBias(0, 2.0f, -1.0f); - scaleBias(1, 2.0f, -1.0f); - scaleBias(2, 2.0f, -1.0f); -} - - -void TexImage::blend(float red, float green, float blue, float alpha, float t) -{ - detach(); - - foreach (i, m->imageArray) - { - FloatImage * img = m->imageArray[i]; - if (img == NULL) continue; - - float * restrict r = img->channel(0); - float * restrict g = img->channel(1); - float * restrict b = img->channel(2); - float * restrict a = img->channel(3); - - const int count = img->width() * img->height(); - for (int i = 0; i < count; i++) - { - r[i] = lerp(r[i], red, t); - g[i] = lerp(g[i], green, t); - b[i] = lerp(b[i], blue, t); - a[i] = lerp(a[i], alpha, t); - } - } -} - -void TexImage::premultiplyAlpha() -{ - detach(); - - foreach (i, m->imageArray) - { - FloatImage * img = m->imageArray[i]; - if (img == NULL) continue; - - float * restrict r = img->channel(0); - float * restrict g = img->channel(1); - float * restrict b = img->channel(2); - float * restrict a = img->channel(3); - - const int count = img->width() * img->height(); - for (int i = 0; i < count; i++) - { - r[i] *= a[i]; - g[i] *= a[i]; - b[i] *= a[i]; - } - } -} - - -void TexImage::toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale) -{ - detach(); - - foreach (i, m->imageArray) - { - FloatImage * img = m->imageArray[i]; - if (img == NULL) continue; - - float sum = redScale + greenScale + blueScale + alphaScale; - redScale /= sum; - greenScale /= sum; - blueScale /= sum; - alphaScale /= sum; - - float * restrict r = img->channel(0); - float * restrict g = img->channel(1); - float * restrict b = img->channel(2); - float * restrict a = img->channel(3); - - const int count = img->width() * img->height(); - for (int i = 0; i < count; i++) - { - float grey = r[i] * redScale + g[i] * greenScale + b[i] * blueScale + a[i] * alphaScale; - a[i] = b[i] = g[i] = r[i] = grey; - } - } -} - -// Draw colored border. -void TexImage::setBorder(float r, float g, float b, float a) -{ - detach(); - - foreach (i, m->imageArray) - { - FloatImage * img = m->imageArray[i]; - if (img == NULL) continue; - - const int w = img->width(); - const int h = img->height(); - - for (int i = 0; i < w; i++) - { - img->setPixel(r, i, 0, 0); - img->setPixel(g, i, 0, 1); - img->setPixel(b, i, 0, 2); - img->setPixel(a, i, 0, 3); - - img->setPixel(r, i, h-1, 0); - img->setPixel(g, i, h-1, 1); - img->setPixel(b, i, h-1, 2); - img->setPixel(a, i, h-1, 3); - } - - for (int i = 0; i < h; i++) - { - img->setPixel(r, 0, i, 0); - img->setPixel(g, 0, i, 1); - img->setPixel(b, 0, i, 2); - img->setPixel(a, 0, i, 3); - - img->setPixel(r, w-1, i, 0); - img->setPixel(g, w-1, i, 1); - img->setPixel(b, w-1, i, 2); - img->setPixel(a, w-1, i, 3); - } - } -} - -// Fill image with the given color. -void TexImage::fill(float red, float green, float blue, float alpha) -{ - detach(); - - foreach (i, m->imageArray) - { - FloatImage * img = m->imageArray[i]; - if (img == NULL) continue; - - float * restrict r = img->channel(0); - float * restrict g = img->channel(1); - float * restrict b = img->channel(2); - float * restrict a = img->channel(3); - - const int count = img->width() * img->height(); - for (int i = 0; i < count; i++) - { - r[i] = red; - g[i] = green; - b[i] = blue; - a[i] = alpha; - } - } -} - -// Set normal map options. -void TexImage::toNormalMap(float sm, float medium, float big, float large) -{ - detach(); - - const Vector4 filterWeights(sm, medium, big, large); - - foreach (i, m->imageArray) - { - if (m->imageArray[i] == NULL) continue; - - const FloatImage * img = m->imageArray[i]; - m->imageArray[i] = nv::createNormalMap(img, (FloatImage::WrapMode)m->wrapMode, filterWeights); - -#pragma message(NV_FILE_LINE "TODO: Pack and expand normals explicitly") - m->imageArray[i]->packNormals(0); - - delete img; - } - - m->isNormalMap = true; -} - -void TexImage::toHeightMap() -{ - detach(); - - foreach (i, m->imageArray) - { - if (m->imageArray[i] == NULL) continue; - -#pragma message(NV_FILE_LINE "TODO: Implement TexImage::toHeightMap") - } - - m->isNormalMap = false; -} - -void TexImage::normalizeNormalMap() -{ - //nvCheck(m->isNormalMap); - - detach(); - - foreach (i, m->imageArray) - { - if (m->imageArray[i] == NULL) continue; - - nv::normalizeNormalMap(m->imageArray[i]); - } -} - -float TexImage::rootMeanSquaredError_rgb(const TexImage & reference) const -{ - int totalCount = 0; - double mse = 0; - - const int faceCount = this->faceCount(); - if (faceCount != reference.faceCount()) { - return FLT_MAX; - } - - for (int f = 0; f < faceCount; f++) - { - const FloatImage * img = m->imageArray[f]; - const FloatImage * ref = reference.m->imageArray[f]; - - if (img == NULL || ref == NULL) { - return FLT_MAX; - } - - nvCheck(img->componentNum() == 4); - nvCheck(ref->componentNum() == 4); - - const uint count = img->width() * img->height(); - totalCount += count; - - for (uint i = 0; i < count; i++) - { - float r0 = img->pixel(4 * i + count * 0); - float g0 = img->pixel(4 * i + count * 1); - float b0 = img->pixel(4 * i + count * 2); - float a0 = img->pixel(4 * i + count * 3); - float r1 = ref->pixel(4 * i + count * 0); - float g1 = ref->pixel(4 * i + count * 1); - float b1 = ref->pixel(4 * i + count * 2); - float a1 = ref->pixel(4 * i + count * 3); - - float r = r0 - r1; - float g = g0 - g1; - float b = b0 - b1; - float a = a0 - a1; - - if (reference.alphaMode() == nvtt::AlphaMode_Transparency) - { - mse += double(r * r * a1) / 255.0; - mse += double(g * g * a1) / 255.0; - mse += double(b * b * a1) / 255.0; - } - else - { - mse += r * r; - mse += g * g; - mse += b * b; - } - } - } - - return float(sqrt(mse / totalCount)); -} - -float TexImage::rootMeanSquaredError_alpha(const TexImage & reference) const -{ - int totalCount = 0; - double mse = 0; - - const int faceCount = this->faceCount(); - if (faceCount != reference.faceCount()) { - return FLT_MAX; - } - - for (int f = 0; f < faceCount; f++) - { - const FloatImage * img = m->imageArray[f]; - const FloatImage * ref = reference.m->imageArray[f]; - - if (img == NULL || ref == NULL) { - return FLT_MAX; - } - - nvCheck(img->componentNum() == 4); - nvCheck(ref->componentNum() == 4); - - const uint count = img->width() * img->height(); - totalCount += count; - - for (uint i = 0; i < count; i++) - { - float a0 = img->pixel(4 * i + count * 3); - float a1 = ref->pixel(4 * i + count * 3); - - float a = a0 - a1; - - mse += a * a; - } - } - - return float(sqrt(mse / totalCount)); -} - diff --git a/src/nvtt/TexImage.h b/src/nvtt/TexImage.h deleted file mode 100644 index c2aa017..0000000 --- a/src/nvtt/TexImage.h +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_TT_TEXIMAGE_H -#define NV_TT_TEXIMAGE_H - -#include "nvtt.h" - -#include -#include -#include - -#include -#include - -namespace nvtt -{ - - struct TexImage::Private : public nv::RefCounted - { - Private() - { - type = TextureType_2D; - wrapMode = WrapMode_Mirror; - alphaMode = AlphaMode_None; - isNormalMap = false; - - imageArray.resize(1, NULL); - } - Private(const Private & p) // Copy ctor. inits refcount to 0. - { - type = p.type; - wrapMode = p.wrapMode; - alphaMode = p.alphaMode; - isNormalMap = p.isNormalMap; - - imageArray = p.imageArray; - } - ~Private() - { - const uint count = imageArray.count(); - for (uint i = 0; i < count; i++) { - delete imageArray[i]; - } - } - - TextureType type; - WrapMode wrapMode; - AlphaMode alphaMode; - bool isNormalMap; - - nv::Array imageArray; - }; - - -} // nvtt namespace - - -#endif // NV_TT_TEXIMAGE_H diff --git a/src/nvtt/cuda/Bitmaps.h b/src/nvtt/cuda/Bitmaps.h index d05e9f3..1494092 100644 --- a/src/nvtt/cuda/Bitmaps.h +++ b/src/nvtt/cuda/Bitmaps.h @@ -1117,770 +1117,3 @@ const static uint s_bitmapTable[992] = 0x55555557, 0x55555557, }; - - -/* -void precomp() -{ - unsigned int bitmaps[1024]; - - int num = 0; - - printf("const static uint s_bitmapTableCTX[704] =\n{\n"); - - for (int a = 1; a <= 15; a++) - { - for (int b = a; b <= 15; b++) - { - for (int c = b; c <= 15; c++) - { - int indices[16]; - - int i = 0; - for(; i < a; i++) { - indices[i] = 0; - } - for(; i < a+b; i++) { - indices[i] = 2; - } - for(; i < a+b+c; i++) { - indices[i] = 3; - } - for(; i < 16; i++) { - indices[i] = 1; - } - - unsigned int bm = 0; - for(i = 0; i < 16; i++) { - bm |= indices[i] << (i * 2); - } - - printf("\t0x%8X, // %d %d %d %d\n", bm, a-0, b-a, c-b, 16-c); - - bitmaps[num] = bm; - num++; - } - } - } - - // Align to 32: 680 -> 704 - while (num < 704) - { - printf("\t0x80000000,\n"); - - bitmaps[num] = 0x80000000; // 15 0 0 1; - num++; - } - - printf("}; // num = %d\n", num); -} -*/ - -const static uint s_bitmapTableCTX[704] = -{ - 0x55555578, // 1 0 0 15 - 0x555555F8, // 1 0 1 14 - 0x555557F8, // 1 0 2 13 - 0x55555FF8, // 1 0 3 12 - 0x55557FF8, // 1 0 4 11 - 0x5555FFF8, // 1 0 5 10 - 0x5557FFF8, // 1 0 6 9 - 0x555FFFF8, // 1 0 7 8 - 0x557FFFF8, // 1 0 8 7 - 0x55FFFFF8, // 1 0 9 6 - 0x57FFFFF8, // 1 0 10 5 - 0x5FFFFFF8, // 1 0 11 4 - 0x7FFFFFF8, // 1 0 12 3 - 0xFFFFFFF8, // 1 0 13 2 - 0xFFFFFFF8, // 1 0 14 1 - 0x555557E8, // 1 1 0 14 - 0x55555FE8, // 1 1 1 13 - 0x55557FE8, // 1 1 2 12 - 0x5555FFE8, // 1 1 3 11 - 0x5557FFE8, // 1 1 4 10 - 0x555FFFE8, // 1 1 5 9 - 0x557FFFE8, // 1 1 6 8 - 0x55FFFFE8, // 1 1 7 7 - 0x57FFFFE8, // 1 1 8 6 - 0x5FFFFFE8, // 1 1 9 5 - 0x7FFFFFE8, // 1 1 10 4 - 0xFFFFFFE8, // 1 1 11 3 - 0xFFFFFFE8, // 1 1 12 2 - 0xFFFFFFE8, // 1 1 13 1 - 0x55557FA8, // 1 2 0 13 - 0x5555FFA8, // 1 2 1 12 - 0x5557FFA8, // 1 2 2 11 - 0x555FFFA8, // 1 2 3 10 - 0x557FFFA8, // 1 2 4 9 - 0x55FFFFA8, // 1 2 5 8 - 0x57FFFFA8, // 1 2 6 7 - 0x5FFFFFA8, // 1 2 7 6 - 0x7FFFFFA8, // 1 2 8 5 - 0xFFFFFFA8, // 1 2 9 4 - 0xFFFFFFA8, // 1 2 10 3 - 0xFFFFFFA8, // 1 2 11 2 - 0xFFFFFFA8, // 1 2 12 1 - 0x5557FEA8, // 1 3 0 12 - 0x555FFEA8, // 1 3 1 11 - 0x557FFEA8, // 1 3 2 10 - 0x55FFFEA8, // 1 3 3 9 - 0x57FFFEA8, // 1 3 4 8 - 0x5FFFFEA8, // 1 3 5 7 - 0x7FFFFEA8, // 1 3 6 6 - 0xFFFFFEA8, // 1 3 7 5 - 0xFFFFFEA8, // 1 3 8 4 - 0xFFFFFEA8, // 1 3 9 3 - 0xFFFFFEA8, // 1 3 10 2 - 0xFFFFFEA8, // 1 3 11 1 - 0x557FFAA8, // 1 4 0 11 - 0x55FFFAA8, // 1 4 1 10 - 0x57FFFAA8, // 1 4 2 9 - 0x5FFFFAA8, // 1 4 3 8 - 0x7FFFFAA8, // 1 4 4 7 - 0xFFFFFAA8, // 1 4 5 6 - 0xFFFFFAA8, // 1 4 6 5 - 0xFFFFFAA8, // 1 4 7 4 - 0xFFFFFAA8, // 1 4 8 3 - 0xFFFFFAA8, // 1 4 9 2 - 0xFFFFFAA8, // 1 4 10 1 - 0x57FFEAA8, // 1 5 0 10 - 0x5FFFEAA8, // 1 5 1 9 - 0x7FFFEAA8, // 1 5 2 8 - 0xFFFFEAA8, // 1 5 3 7 - 0xFFFFEAA8, // 1 5 4 6 - 0xFFFFEAA8, // 1 5 5 5 - 0xFFFFEAA8, // 1 5 6 4 - 0xFFFFEAA8, // 1 5 7 3 - 0xFFFFEAA8, // 1 5 8 2 - 0xFFFFEAA8, // 1 5 9 1 - 0x7FFFAAA8, // 1 6 0 9 - 0xFFFFAAA8, // 1 6 1 8 - 0xFFFFAAA8, // 1 6 2 7 - 0xFFFFAAA8, // 1 6 3 6 - 0xFFFFAAA8, // 1 6 4 5 - 0xFFFFAAA8, // 1 6 5 4 - 0xFFFFAAA8, // 1 6 6 3 - 0xFFFFAAA8, // 1 6 7 2 - 0xFFFFAAA8, // 1 6 8 1 - 0xFFFEAAA8, // 1 7 0 8 - 0xFFFEAAA8, // 1 7 1 7 - 0xFFFEAAA8, // 1 7 2 6 - 0xFFFEAAA8, // 1 7 3 5 - 0xFFFEAAA8, // 1 7 4 4 - 0xFFFEAAA8, // 1 7 5 3 - 0xFFFEAAA8, // 1 7 6 2 - 0xFFFEAAA8, // 1 7 7 1 - 0xFFFAAAA8, // 1 8 0 7 - 0xFFFAAAA8, // 1 8 1 6 - 0xFFFAAAA8, // 1 8 2 5 - 0xFFFAAAA8, // 1 8 3 4 - 0xFFFAAAA8, // 1 8 4 3 - 0xFFFAAAA8, // 1 8 5 2 - 0xFFFAAAA8, // 1 8 6 1 - 0xFFEAAAA8, // 1 9 0 6 - 0xFFEAAAA8, // 1 9 1 5 - 0xFFEAAAA8, // 1 9 2 4 - 0xFFEAAAA8, // 1 9 3 3 - 0xFFEAAAA8, // 1 9 4 2 - 0xFFEAAAA8, // 1 9 5 1 - 0xFFAAAAA8, // 1 10 0 5 - 0xFFAAAAA8, // 1 10 1 4 - 0xFFAAAAA8, // 1 10 2 3 - 0xFFAAAAA8, // 1 10 3 2 - 0xFFAAAAA8, // 1 10 4 1 - 0xFEAAAAA8, // 1 11 0 4 - 0xFEAAAAA8, // 1 11 1 3 - 0xFEAAAAA8, // 1 11 2 2 - 0xFEAAAAA8, // 1 11 3 1 - 0xFAAAAAA8, // 1 12 0 3 - 0xFAAAAAA8, // 1 12 1 2 - 0xFAAAAAA8, // 1 12 2 1 - 0xEAAAAAA8, // 1 13 0 2 - 0xEAAAAAA8, // 1 13 1 1 - 0xAAAAAAA8, // 1 14 0 1 - 0x55555FA0, // 2 0 0 14 - 0x55557FA0, // 2 0 1 13 - 0x5555FFA0, // 2 0 2 12 - 0x5557FFA0, // 2 0 3 11 - 0x555FFFA0, // 2 0 4 10 - 0x557FFFA0, // 2 0 5 9 - 0x55FFFFA0, // 2 0 6 8 - 0x57FFFFA0, // 2 0 7 7 - 0x5FFFFFA0, // 2 0 8 6 - 0x7FFFFFA0, // 2 0 9 5 - 0xFFFFFFA0, // 2 0 10 4 - 0xFFFFFFA0, // 2 0 11 3 - 0xFFFFFFA0, // 2 0 12 2 - 0xFFFFFFA0, // 2 0 13 1 - 0x5555FEA0, // 2 1 0 13 - 0x5557FEA0, // 2 1 1 12 - 0x555FFEA0, // 2 1 2 11 - 0x557FFEA0, // 2 1 3 10 - 0x55FFFEA0, // 2 1 4 9 - 0x57FFFEA0, // 2 1 5 8 - 0x5FFFFEA0, // 2 1 6 7 - 0x7FFFFEA0, // 2 1 7 6 - 0xFFFFFEA0, // 2 1 8 5 - 0xFFFFFEA0, // 2 1 9 4 - 0xFFFFFEA0, // 2 1 10 3 - 0xFFFFFEA0, // 2 1 11 2 - 0xFFFFFEA0, // 2 1 12 1 - 0x555FFAA0, // 2 2 0 12 - 0x557FFAA0, // 2 2 1 11 - 0x55FFFAA0, // 2 2 2 10 - 0x57FFFAA0, // 2 2 3 9 - 0x5FFFFAA0, // 2 2 4 8 - 0x7FFFFAA0, // 2 2 5 7 - 0xFFFFFAA0, // 2 2 6 6 - 0xFFFFFAA0, // 2 2 7 5 - 0xFFFFFAA0, // 2 2 8 4 - 0xFFFFFAA0, // 2 2 9 3 - 0xFFFFFAA0, // 2 2 10 2 - 0xFFFFFAA0, // 2 2 11 1 - 0x55FFEAA0, // 2 3 0 11 - 0x57FFEAA0, // 2 3 1 10 - 0x5FFFEAA0, // 2 3 2 9 - 0x7FFFEAA0, // 2 3 3 8 - 0xFFFFEAA0, // 2 3 4 7 - 0xFFFFEAA0, // 2 3 5 6 - 0xFFFFEAA0, // 2 3 6 5 - 0xFFFFEAA0, // 2 3 7 4 - 0xFFFFEAA0, // 2 3 8 3 - 0xFFFFEAA0, // 2 3 9 2 - 0xFFFFEAA0, // 2 3 10 1 - 0x5FFFAAA0, // 2 4 0 10 - 0x7FFFAAA0, // 2 4 1 9 - 0xFFFFAAA0, // 2 4 2 8 - 0xFFFFAAA0, // 2 4 3 7 - 0xFFFFAAA0, // 2 4 4 6 - 0xFFFFAAA0, // 2 4 5 5 - 0xFFFFAAA0, // 2 4 6 4 - 0xFFFFAAA0, // 2 4 7 3 - 0xFFFFAAA0, // 2 4 8 2 - 0xFFFFAAA0, // 2 4 9 1 - 0xFFFEAAA0, // 2 5 0 9 - 0xFFFEAAA0, // 2 5 1 8 - 0xFFFEAAA0, // 2 5 2 7 - 0xFFFEAAA0, // 2 5 3 6 - 0xFFFEAAA0, // 2 5 4 5 - 0xFFFEAAA0, // 2 5 5 4 - 0xFFFEAAA0, // 2 5 6 3 - 0xFFFEAAA0, // 2 5 7 2 - 0xFFFEAAA0, // 2 5 8 1 - 0xFFFAAAA0, // 2 6 0 8 - 0xFFFAAAA0, // 2 6 1 7 - 0xFFFAAAA0, // 2 6 2 6 - 0xFFFAAAA0, // 2 6 3 5 - 0xFFFAAAA0, // 2 6 4 4 - 0xFFFAAAA0, // 2 6 5 3 - 0xFFFAAAA0, // 2 6 6 2 - 0xFFFAAAA0, // 2 6 7 1 - 0xFFEAAAA0, // 2 7 0 7 - 0xFFEAAAA0, // 2 7 1 6 - 0xFFEAAAA0, // 2 7 2 5 - 0xFFEAAAA0, // 2 7 3 4 - 0xFFEAAAA0, // 2 7 4 3 - 0xFFEAAAA0, // 2 7 5 2 - 0xFFEAAAA0, // 2 7 6 1 - 0xFFAAAAA0, // 2 8 0 6 - 0xFFAAAAA0, // 2 8 1 5 - 0xFFAAAAA0, // 2 8 2 4 - 0xFFAAAAA0, // 2 8 3 3 - 0xFFAAAAA0, // 2 8 4 2 - 0xFFAAAAA0, // 2 8 5 1 - 0xFEAAAAA0, // 2 9 0 5 - 0xFEAAAAA0, // 2 9 1 4 - 0xFEAAAAA0, // 2 9 2 3 - 0xFEAAAAA0, // 2 9 3 2 - 0xFEAAAAA0, // 2 9 4 1 - 0xFAAAAAA0, // 2 10 0 4 - 0xFAAAAAA0, // 2 10 1 3 - 0xFAAAAAA0, // 2 10 2 2 - 0xFAAAAAA0, // 2 10 3 1 - 0xEAAAAAA0, // 2 11 0 3 - 0xEAAAAAA0, // 2 11 1 2 - 0xEAAAAAA0, // 2 11 2 1 - 0xAAAAAAA0, // 2 12 0 2 - 0xAAAAAAA0, // 2 12 1 1 - 0xAAAAAAA0, // 2 13 0 1 - 0x5557FA80, // 3 0 0 13 - 0x555FFA80, // 3 0 1 12 - 0x557FFA80, // 3 0 2 11 - 0x55FFFA80, // 3 0 3 10 - 0x57FFFA80, // 3 0 4 9 - 0x5FFFFA80, // 3 0 5 8 - 0x7FFFFA80, // 3 0 6 7 - 0xFFFFFA80, // 3 0 7 6 - 0xFFFFFA80, // 3 0 8 5 - 0xFFFFFA80, // 3 0 9 4 - 0xFFFFFA80, // 3 0 10 3 - 0xFFFFFA80, // 3 0 11 2 - 0xFFFFFA80, // 3 0 12 1 - 0x557FEA80, // 3 1 0 12 - 0x55FFEA80, // 3 1 1 11 - 0x57FFEA80, // 3 1 2 10 - 0x5FFFEA80, // 3 1 3 9 - 0x7FFFEA80, // 3 1 4 8 - 0xFFFFEA80, // 3 1 5 7 - 0xFFFFEA80, // 3 1 6 6 - 0xFFFFEA80, // 3 1 7 5 - 0xFFFFEA80, // 3 1 8 4 - 0xFFFFEA80, // 3 1 9 3 - 0xFFFFEA80, // 3 1 10 2 - 0xFFFFEA80, // 3 1 11 1 - 0x57FFAA80, // 3 2 0 11 - 0x5FFFAA80, // 3 2 1 10 - 0x7FFFAA80, // 3 2 2 9 - 0xFFFFAA80, // 3 2 3 8 - 0xFFFFAA80, // 3 2 4 7 - 0xFFFFAA80, // 3 2 5 6 - 0xFFFFAA80, // 3 2 6 5 - 0xFFFFAA80, // 3 2 7 4 - 0xFFFFAA80, // 3 2 8 3 - 0xFFFFAA80, // 3 2 9 2 - 0xFFFFAA80, // 3 2 10 1 - 0x7FFEAA80, // 3 3 0 10 - 0xFFFEAA80, // 3 3 1 9 - 0xFFFEAA80, // 3 3 2 8 - 0xFFFEAA80, // 3 3 3 7 - 0xFFFEAA80, // 3 3 4 6 - 0xFFFEAA80, // 3 3 5 5 - 0xFFFEAA80, // 3 3 6 4 - 0xFFFEAA80, // 3 3 7 3 - 0xFFFEAA80, // 3 3 8 2 - 0xFFFEAA80, // 3 3 9 1 - 0xFFFAAA80, // 3 4 0 9 - 0xFFFAAA80, // 3 4 1 8 - 0xFFFAAA80, // 3 4 2 7 - 0xFFFAAA80, // 3 4 3 6 - 0xFFFAAA80, // 3 4 4 5 - 0xFFFAAA80, // 3 4 5 4 - 0xFFFAAA80, // 3 4 6 3 - 0xFFFAAA80, // 3 4 7 2 - 0xFFFAAA80, // 3 4 8 1 - 0xFFEAAA80, // 3 5 0 8 - 0xFFEAAA80, // 3 5 1 7 - 0xFFEAAA80, // 3 5 2 6 - 0xFFEAAA80, // 3 5 3 5 - 0xFFEAAA80, // 3 5 4 4 - 0xFFEAAA80, // 3 5 5 3 - 0xFFEAAA80, // 3 5 6 2 - 0xFFEAAA80, // 3 5 7 1 - 0xFFAAAA80, // 3 6 0 7 - 0xFFAAAA80, // 3 6 1 6 - 0xFFAAAA80, // 3 6 2 5 - 0xFFAAAA80, // 3 6 3 4 - 0xFFAAAA80, // 3 6 4 3 - 0xFFAAAA80, // 3 6 5 2 - 0xFFAAAA80, // 3 6 6 1 - 0xFEAAAA80, // 3 7 0 6 - 0xFEAAAA80, // 3 7 1 5 - 0xFEAAAA80, // 3 7 2 4 - 0xFEAAAA80, // 3 7 3 3 - 0xFEAAAA80, // 3 7 4 2 - 0xFEAAAA80, // 3 7 5 1 - 0xFAAAAA80, // 3 8 0 5 - 0xFAAAAA80, // 3 8 1 4 - 0xFAAAAA80, // 3 8 2 3 - 0xFAAAAA80, // 3 8 3 2 - 0xFAAAAA80, // 3 8 4 1 - 0xEAAAAA80, // 3 9 0 4 - 0xEAAAAA80, // 3 9 1 3 - 0xEAAAAA80, // 3 9 2 2 - 0xEAAAAA80, // 3 9 3 1 - 0xAAAAAA80, // 3 10 0 3 - 0xAAAAAA80, // 3 10 1 2 - 0xAAAAAA80, // 3 10 2 1 - 0xAAAAAA80, // 3 11 0 2 - 0xAAAAAA80, // 3 11 1 1 - 0xAAAAAA80, // 3 12 0 1 - 0x55FFAA00, // 4 0 0 12 - 0x57FFAA00, // 4 0 1 11 - 0x5FFFAA00, // 4 0 2 10 - 0x7FFFAA00, // 4 0 3 9 - 0xFFFFAA00, // 4 0 4 8 - 0xFFFFAA00, // 4 0 5 7 - 0xFFFFAA00, // 4 0 6 6 - 0xFFFFAA00, // 4 0 7 5 - 0xFFFFAA00, // 4 0 8 4 - 0xFFFFAA00, // 4 0 9 3 - 0xFFFFAA00, // 4 0 10 2 - 0xFFFFAA00, // 4 0 11 1 - 0x5FFEAA00, // 4 1 0 11 - 0x7FFEAA00, // 4 1 1 10 - 0xFFFEAA00, // 4 1 2 9 - 0xFFFEAA00, // 4 1 3 8 - 0xFFFEAA00, // 4 1 4 7 - 0xFFFEAA00, // 4 1 5 6 - 0xFFFEAA00, // 4 1 6 5 - 0xFFFEAA00, // 4 1 7 4 - 0xFFFEAA00, // 4 1 8 3 - 0xFFFEAA00, // 4 1 9 2 - 0xFFFEAA00, // 4 1 10 1 - 0xFFFAAA00, // 4 2 0 10 - 0xFFFAAA00, // 4 2 1 9 - 0xFFFAAA00, // 4 2 2 8 - 0xFFFAAA00, // 4 2 3 7 - 0xFFFAAA00, // 4 2 4 6 - 0xFFFAAA00, // 4 2 5 5 - 0xFFFAAA00, // 4 2 6 4 - 0xFFFAAA00, // 4 2 7 3 - 0xFFFAAA00, // 4 2 8 2 - 0xFFFAAA00, // 4 2 9 1 - 0xFFEAAA00, // 4 3 0 9 - 0xFFEAAA00, // 4 3 1 8 - 0xFFEAAA00, // 4 3 2 7 - 0xFFEAAA00, // 4 3 3 6 - 0xFFEAAA00, // 4 3 4 5 - 0xFFEAAA00, // 4 3 5 4 - 0xFFEAAA00, // 4 3 6 3 - 0xFFEAAA00, // 4 3 7 2 - 0xFFEAAA00, // 4 3 8 1 - 0xFFAAAA00, // 4 4 0 8 - 0xFFAAAA00, // 4 4 1 7 - 0xFFAAAA00, // 4 4 2 6 - 0xFFAAAA00, // 4 4 3 5 - 0xFFAAAA00, // 4 4 4 4 - 0xFFAAAA00, // 4 4 5 3 - 0xFFAAAA00, // 4 4 6 2 - 0xFFAAAA00, // 4 4 7 1 - 0xFEAAAA00, // 4 5 0 7 - 0xFEAAAA00, // 4 5 1 6 - 0xFEAAAA00, // 4 5 2 5 - 0xFEAAAA00, // 4 5 3 4 - 0xFEAAAA00, // 4 5 4 3 - 0xFEAAAA00, // 4 5 5 2 - 0xFEAAAA00, // 4 5 6 1 - 0xFAAAAA00, // 4 6 0 6 - 0xFAAAAA00, // 4 6 1 5 - 0xFAAAAA00, // 4 6 2 4 - 0xFAAAAA00, // 4 6 3 3 - 0xFAAAAA00, // 4 6 4 2 - 0xFAAAAA00, // 4 6 5 1 - 0xEAAAAA00, // 4 7 0 5 - 0xEAAAAA00, // 4 7 1 4 - 0xEAAAAA00, // 4 7 2 3 - 0xEAAAAA00, // 4 7 3 2 - 0xEAAAAA00, // 4 7 4 1 - 0xAAAAAA00, // 4 8 0 4 - 0xAAAAAA00, // 4 8 1 3 - 0xAAAAAA00, // 4 8 2 2 - 0xAAAAAA00, // 4 8 3 1 - 0xAAAAAA00, // 4 9 0 3 - 0xAAAAAA00, // 4 9 1 2 - 0xAAAAAA00, // 4 9 2 1 - 0xAAAAAA00, // 4 10 0 2 - 0xAAAAAA00, // 4 10 1 1 - 0xAAAAAA00, // 4 11 0 1 - 0x7FFAA800, // 5 0 0 11 - 0xFFFAA800, // 5 0 1 10 - 0xFFFAA800, // 5 0 2 9 - 0xFFFAA800, // 5 0 3 8 - 0xFFFAA800, // 5 0 4 7 - 0xFFFAA800, // 5 0 5 6 - 0xFFFAA800, // 5 0 6 5 - 0xFFFAA800, // 5 0 7 4 - 0xFFFAA800, // 5 0 8 3 - 0xFFFAA800, // 5 0 9 2 - 0xFFFAA800, // 5 0 10 1 - 0xFFEAA800, // 5 1 0 10 - 0xFFEAA800, // 5 1 1 9 - 0xFFEAA800, // 5 1 2 8 - 0xFFEAA800, // 5 1 3 7 - 0xFFEAA800, // 5 1 4 6 - 0xFFEAA800, // 5 1 5 5 - 0xFFEAA800, // 5 1 6 4 - 0xFFEAA800, // 5 1 7 3 - 0xFFEAA800, // 5 1 8 2 - 0xFFEAA800, // 5 1 9 1 - 0xFFAAA800, // 5 2 0 9 - 0xFFAAA800, // 5 2 1 8 - 0xFFAAA800, // 5 2 2 7 - 0xFFAAA800, // 5 2 3 6 - 0xFFAAA800, // 5 2 4 5 - 0xFFAAA800, // 5 2 5 4 - 0xFFAAA800, // 5 2 6 3 - 0xFFAAA800, // 5 2 7 2 - 0xFFAAA800, // 5 2 8 1 - 0xFEAAA800, // 5 3 0 8 - 0xFEAAA800, // 5 3 1 7 - 0xFEAAA800, // 5 3 2 6 - 0xFEAAA800, // 5 3 3 5 - 0xFEAAA800, // 5 3 4 4 - 0xFEAAA800, // 5 3 5 3 - 0xFEAAA800, // 5 3 6 2 - 0xFEAAA800, // 5 3 7 1 - 0xFAAAA800, // 5 4 0 7 - 0xFAAAA800, // 5 4 1 6 - 0xFAAAA800, // 5 4 2 5 - 0xFAAAA800, // 5 4 3 4 - 0xFAAAA800, // 5 4 4 3 - 0xFAAAA800, // 5 4 5 2 - 0xFAAAA800, // 5 4 6 1 - 0xEAAAA800, // 5 5 0 6 - 0xEAAAA800, // 5 5 1 5 - 0xEAAAA800, // 5 5 2 4 - 0xEAAAA800, // 5 5 3 3 - 0xEAAAA800, // 5 5 4 2 - 0xEAAAA800, // 5 5 5 1 - 0xAAAAA800, // 5 6 0 5 - 0xAAAAA800, // 5 6 1 4 - 0xAAAAA800, // 5 6 2 3 - 0xAAAAA800, // 5 6 3 2 - 0xAAAAA800, // 5 6 4 1 - 0xAAAAA800, // 5 7 0 4 - 0xAAAAA800, // 5 7 1 3 - 0xAAAAA800, // 5 7 2 2 - 0xAAAAA800, // 5 7 3 1 - 0xAAAAA800, // 5 8 0 3 - 0xAAAAA800, // 5 8 1 2 - 0xAAAAA800, // 5 8 2 1 - 0xAAAAA800, // 5 9 0 2 - 0xAAAAA800, // 5 9 1 1 - 0xAAAAA800, // 5 10 0 1 - 0xFFAAA000, // 6 0 0 10 - 0xFFAAA000, // 6 0 1 9 - 0xFFAAA000, // 6 0 2 8 - 0xFFAAA000, // 6 0 3 7 - 0xFFAAA000, // 6 0 4 6 - 0xFFAAA000, // 6 0 5 5 - 0xFFAAA000, // 6 0 6 4 - 0xFFAAA000, // 6 0 7 3 - 0xFFAAA000, // 6 0 8 2 - 0xFFAAA000, // 6 0 9 1 - 0xFEAAA000, // 6 1 0 9 - 0xFEAAA000, // 6 1 1 8 - 0xFEAAA000, // 6 1 2 7 - 0xFEAAA000, // 6 1 3 6 - 0xFEAAA000, // 6 1 4 5 - 0xFEAAA000, // 6 1 5 4 - 0xFEAAA000, // 6 1 6 3 - 0xFEAAA000, // 6 1 7 2 - 0xFEAAA000, // 6 1 8 1 - 0xFAAAA000, // 6 2 0 8 - 0xFAAAA000, // 6 2 1 7 - 0xFAAAA000, // 6 2 2 6 - 0xFAAAA000, // 6 2 3 5 - 0xFAAAA000, // 6 2 4 4 - 0xFAAAA000, // 6 2 5 3 - 0xFAAAA000, // 6 2 6 2 - 0xFAAAA000, // 6 2 7 1 - 0xEAAAA000, // 6 3 0 7 - 0xEAAAA000, // 6 3 1 6 - 0xEAAAA000, // 6 3 2 5 - 0xEAAAA000, // 6 3 3 4 - 0xEAAAA000, // 6 3 4 3 - 0xEAAAA000, // 6 3 5 2 - 0xEAAAA000, // 6 3 6 1 - 0xAAAAA000, // 6 4 0 6 - 0xAAAAA000, // 6 4 1 5 - 0xAAAAA000, // 6 4 2 4 - 0xAAAAA000, // 6 4 3 3 - 0xAAAAA000, // 6 4 4 2 - 0xAAAAA000, // 6 4 5 1 - 0xAAAAA000, // 6 5 0 5 - 0xAAAAA000, // 6 5 1 4 - 0xAAAAA000, // 6 5 2 3 - 0xAAAAA000, // 6 5 3 2 - 0xAAAAA000, // 6 5 4 1 - 0xAAAAA000, // 6 6 0 4 - 0xAAAAA000, // 6 6 1 3 - 0xAAAAA000, // 6 6 2 2 - 0xAAAAA000, // 6 6 3 1 - 0xAAAAA000, // 6 7 0 3 - 0xAAAAA000, // 6 7 1 2 - 0xAAAAA000, // 6 7 2 1 - 0xAAAAA000, // 6 8 0 2 - 0xAAAAA000, // 6 8 1 1 - 0xAAAAA000, // 6 9 0 1 - 0xFAAA8000, // 7 0 0 9 - 0xFAAA8000, // 7 0 1 8 - 0xFAAA8000, // 7 0 2 7 - 0xFAAA8000, // 7 0 3 6 - 0xFAAA8000, // 7 0 4 5 - 0xFAAA8000, // 7 0 5 4 - 0xFAAA8000, // 7 0 6 3 - 0xFAAA8000, // 7 0 7 2 - 0xFAAA8000, // 7 0 8 1 - 0xEAAA8000, // 7 1 0 8 - 0xEAAA8000, // 7 1 1 7 - 0xEAAA8000, // 7 1 2 6 - 0xEAAA8000, // 7 1 3 5 - 0xEAAA8000, // 7 1 4 4 - 0xEAAA8000, // 7 1 5 3 - 0xEAAA8000, // 7 1 6 2 - 0xEAAA8000, // 7 1 7 1 - 0xAAAA8000, // 7 2 0 7 - 0xAAAA8000, // 7 2 1 6 - 0xAAAA8000, // 7 2 2 5 - 0xAAAA8000, // 7 2 3 4 - 0xAAAA8000, // 7 2 4 3 - 0xAAAA8000, // 7 2 5 2 - 0xAAAA8000, // 7 2 6 1 - 0xAAAA8000, // 7 3 0 6 - 0xAAAA8000, // 7 3 1 5 - 0xAAAA8000, // 7 3 2 4 - 0xAAAA8000, // 7 3 3 3 - 0xAAAA8000, // 7 3 4 2 - 0xAAAA8000, // 7 3 5 1 - 0xAAAA8000, // 7 4 0 5 - 0xAAAA8000, // 7 4 1 4 - 0xAAAA8000, // 7 4 2 3 - 0xAAAA8000, // 7 4 3 2 - 0xAAAA8000, // 7 4 4 1 - 0xAAAA8000, // 7 5 0 4 - 0xAAAA8000, // 7 5 1 3 - 0xAAAA8000, // 7 5 2 2 - 0xAAAA8000, // 7 5 3 1 - 0xAAAA8000, // 7 6 0 3 - 0xAAAA8000, // 7 6 1 2 - 0xAAAA8000, // 7 6 2 1 - 0xAAAA8000, // 7 7 0 2 - 0xAAAA8000, // 7 7 1 1 - 0xAAAA8000, // 7 8 0 1 - 0xAAAA0000, // 8 0 0 8 - 0xAAAA0000, // 8 0 1 7 - 0xAAAA0000, // 8 0 2 6 - 0xAAAA0000, // 8 0 3 5 - 0xAAAA0000, // 8 0 4 4 - 0xAAAA0000, // 8 0 5 3 - 0xAAAA0000, // 8 0 6 2 - 0xAAAA0000, // 8 0 7 1 - 0xAAAA0000, // 8 1 0 7 - 0xAAAA0000, // 8 1 1 6 - 0xAAAA0000, // 8 1 2 5 - 0xAAAA0000, // 8 1 3 4 - 0xAAAA0000, // 8 1 4 3 - 0xAAAA0000, // 8 1 5 2 - 0xAAAA0000, // 8 1 6 1 - 0xAAAA0000, // 8 2 0 6 - 0xAAAA0000, // 8 2 1 5 - 0xAAAA0000, // 8 2 2 4 - 0xAAAA0000, // 8 2 3 3 - 0xAAAA0000, // 8 2 4 2 - 0xAAAA0000, // 8 2 5 1 - 0xAAAA0000, // 8 3 0 5 - 0xAAAA0000, // 8 3 1 4 - 0xAAAA0000, // 8 3 2 3 - 0xAAAA0000, // 8 3 3 2 - 0xAAAA0000, // 8 3 4 1 - 0xAAAA0000, // 8 4 0 4 - 0xAAAA0000, // 8 4 1 3 - 0xAAAA0000, // 8 4 2 2 - 0xAAAA0000, // 8 4 3 1 - 0xAAAA0000, // 8 5 0 3 - 0xAAAA0000, // 8 5 1 2 - 0xAAAA0000, // 8 5 2 1 - 0xAAAA0000, // 8 6 0 2 - 0xAAAA0000, // 8 6 1 1 - 0xAAAA0000, // 8 7 0 1 - 0xAAA80000, // 9 0 0 7 - 0xAAA80000, // 9 0 1 6 - 0xAAA80000, // 9 0 2 5 - 0xAAA80000, // 9 0 3 4 - 0xAAA80000, // 9 0 4 3 - 0xAAA80000, // 9 0 5 2 - 0xAAA80000, // 9 0 6 1 - 0xAAA80000, // 9 1 0 6 - 0xAAA80000, // 9 1 1 5 - 0xAAA80000, // 9 1 2 4 - 0xAAA80000, // 9 1 3 3 - 0xAAA80000, // 9 1 4 2 - 0xAAA80000, // 9 1 5 1 - 0xAAA80000, // 9 2 0 5 - 0xAAA80000, // 9 2 1 4 - 0xAAA80000, // 9 2 2 3 - 0xAAA80000, // 9 2 3 2 - 0xAAA80000, // 9 2 4 1 - 0xAAA80000, // 9 3 0 4 - 0xAAA80000, // 9 3 1 3 - 0xAAA80000, // 9 3 2 2 - 0xAAA80000, // 9 3 3 1 - 0xAAA80000, // 9 4 0 3 - 0xAAA80000, // 9 4 1 2 - 0xAAA80000, // 9 4 2 1 - 0xAAA80000, // 9 5 0 2 - 0xAAA80000, // 9 5 1 1 - 0xAAA80000, // 9 6 0 1 - 0xAAA00000, // 10 0 0 6 - 0xAAA00000, // 10 0 1 5 - 0xAAA00000, // 10 0 2 4 - 0xAAA00000, // 10 0 3 3 - 0xAAA00000, // 10 0 4 2 - 0xAAA00000, // 10 0 5 1 - 0xAAA00000, // 10 1 0 5 - 0xAAA00000, // 10 1 1 4 - 0xAAA00000, // 10 1 2 3 - 0xAAA00000, // 10 1 3 2 - 0xAAA00000, // 10 1 4 1 - 0xAAA00000, // 10 2 0 4 - 0xAAA00000, // 10 2 1 3 - 0xAAA00000, // 10 2 2 2 - 0xAAA00000, // 10 2 3 1 - 0xAAA00000, // 10 3 0 3 - 0xAAA00000, // 10 3 1 2 - 0xAAA00000, // 10 3 2 1 - 0xAAA00000, // 10 4 0 2 - 0xAAA00000, // 10 4 1 1 - 0xAAA00000, // 10 5 0 1 - 0xAA800000, // 11 0 0 5 - 0xAA800000, // 11 0 1 4 - 0xAA800000, // 11 0 2 3 - 0xAA800000, // 11 0 3 2 - 0xAA800000, // 11 0 4 1 - 0xAA800000, // 11 1 0 4 - 0xAA800000, // 11 1 1 3 - 0xAA800000, // 11 1 2 2 - 0xAA800000, // 11 1 3 1 - 0xAA800000, // 11 2 0 3 - 0xAA800000, // 11 2 1 2 - 0xAA800000, // 11 2 2 1 - 0xAA800000, // 11 3 0 2 - 0xAA800000, // 11 3 1 1 - 0xAA800000, // 11 4 0 1 - 0xAA000000, // 12 0 0 4 - 0xAA000000, // 12 0 1 3 - 0xAA000000, // 12 0 2 2 - 0xAA000000, // 12 0 3 1 - 0xAA000000, // 12 1 0 3 - 0xAA000000, // 12 1 1 2 - 0xAA000000, // 12 1 2 1 - 0xAA000000, // 12 2 0 2 - 0xAA000000, // 12 2 1 1 - 0xAA000000, // 12 3 0 1 - 0xA8000000, // 13 0 0 3 - 0xA8000000, // 13 0 1 2 - 0xA8000000, // 13 0 2 1 - 0xA8000000, // 13 1 0 2 - 0xA8000000, // 13 1 1 1 - 0xA8000000, // 13 2 0 1 - 0xA0000000, // 14 0 0 2 - 0xA0000000, // 14 0 1 1 - 0xA0000000, // 14 1 0 1 - 0x80000000, // 15 0 0 1 - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, - 0x80000000, -}; - diff --git a/src/nvtt/cuda/CompressKernel.cu b/src/nvtt/cuda/CompressKernel.cu index cbdf492..ce332d0 100644 --- a/src/nvtt/cuda/CompressKernel.cu +++ b/src/nvtt/cuda/CompressKernel.cu @@ -21,8 +21,10 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. +#include +#include +#include #include -#include // FLT_MAX #include "CudaMath.h" @@ -51,397 +53,73 @@ __device__ inline void swap(T & a, T & b) __constant__ float3 kColorMetric = { 1.0f, 1.0f, 1.0f }; __constant__ float3 kColorMetricSqr = { 1.0f, 1.0f, 1.0f }; -// Some kernels read the input through texture. -texture tex; //////////////////////////////////////////////////////////////////////////////// -// Color helpers -//////////////////////////////////////////////////////////////////////////////// - -__device__ inline uint float_to_u8(float value) -{ - return min(max(__float2int_rn((255 * value + 0.5f) / (1.0f + 1.0f/255.0f)), 0), 255); -} - -__device__ inline uint float_to_u6(float value) -{ - return min(max(__float2int_rn((63 * value + 0.5f) / (1.0f + 1.0f/63.0f)), 0), 63); -} - -__device__ inline uint float_to_u5(float value) -{ - return min(max(__float2int_rn((31 * value + 0.5f) / (1.0f + 1.0f/31.0f)), 0), 31); -} - -__device__ inline float u8_to_float(uint value) -{ - return __saturatef(__uint2float_rn(value) / 255.0f); - //return (value) / 255.0f; -} - -__device__ float3 color32ToFloat3(uint c) -{ - float3 color; - color.z = u8_to_float((c >> 0) & 0xFF); - color.y = u8_to_float((c >> 8) & 0xFF); - color.x = u8_to_float((c >> 16) & 0xFF); - return color; -} - -__device__ int3 color16ToInt3(ushort c) -{ - int3 color; - - color.z = ((c >> 0) & 0x1F); - color.z = (color.z << 3) | (color.z >> 2); - - color.y = ((c >> 5) & 0x3F); - color.y = (color.y << 2) | (color.y >> 4); - - color.x = ((c >> 11) & 0x1F); - color.x = (color.x << 3) | (color.x >> 2); - - return color; -} - -__device__ float3 color16ToFloat3(ushort c) -{ - int3 color = color16ToInt3(c); - return make_float3(color.x, color.y, color.z) * (1.0f / 255.0f); -} - -__device__ int3 float3ToInt3(float3 c) -{ - return make_int3(c.x * 255, c.y * 255, c.z * 255); -} - -__device__ float3 int3ToFloat3(int3 c) -{ - return make_float3(float_to_u8(c.x), float_to_u8(c.y), float_to_u8(c.z)); -} - - -__device__ int colorDistance(int3 c0, int3 c1) -{ - int dx = c0.x-c1.x; - int dy = c0.y-c1.y; - int dz = c0.z-c1.z; - return __mul24(dx, dx) + __mul24(dy, dy) + __mul24(dz, dz); -} - - -//////////////////////////////////////////////////////////////////////////////// -// Round color to RGB565 and expand +// Sort colors //////////////////////////////////////////////////////////////////////////////// - - -#if 0 -__device__ inline uint float_to_u8(float value) -{ - //uint result; - //asm("cvt.sat.rni.u8.f32 %0, %1;" : "=r" (result) : "f" (value)); - //return result; - //return __float2uint_rn(__saturatef(value) * 255.0f); - - int result = __float2int_rn((255 * value + 0.5f) / (1.0f + 1.0f/255.0f)); - result = max(result, 0); - result = min(result, 255); - return result; -} - -__device__ inline float u8_to_float(uint value) -{ - //float result; - //asm("cvt.sat.rn.f32.u8 %0, %1;" : "=f" (result) : "r" (value)); // this is wrong! - //return result; - return __saturatef(__uint2float_rn(value) / 255.0f); -} - -inline __device__ float3 roundAndExpand565(float3 v, ushort * w) -{ - uint x = float_to_u8(v.x) >> 3; - uint y = float_to_u8(v.y) >> 2; - uint z = float_to_u8(v.z) >> 3; - *w = (x << 11) | (y << 5) | z; - v.x = u8_to_float((x << 3) | (x >> 2)); - v.y = u8_to_float((y << 2) | (y >> 4)); - v.z = u8_to_float((z << 3) | (z >> 2)); -// v.x = u8_to_float(x) * 255.0f / 31.0f; -// v.y = u8_to_float(y) * 255.0f / 63.0f; -// v.z = u8_to_float(z) * 255.0f / 31.0f; - return v; -} -#else - -inline __device__ float3 roundAndExpand565(float3 v, ushort * w) -{ - uint x = __float2uint_rn(__saturatef(v.x) * 31.0f); - uint y = __float2uint_rn(__saturatef(v.y) * 63.0f); - uint z = __float2uint_rn(__saturatef(v.z) * 31.0f); - - //uint x = float_to_u5(v.x); - //uint y = float_to_u6(v.y); - //uint z = float_to_u5(v.z); - - *w = (x << 11) | (y << 5) | z; +__device__ void sortColors(const float * values, int * cmp) +{ + int tid = threadIdx.x; + +#if 1 + cmp[tid] = (values[0] < values[tid]); + cmp[tid] += (values[1] < values[tid]); + cmp[tid] += (values[2] < values[tid]); + cmp[tid] += (values[3] < values[tid]); + cmp[tid] += (values[4] < values[tid]); + cmp[tid] += (values[5] < values[tid]); + cmp[tid] += (values[6] < values[tid]); + cmp[tid] += (values[7] < values[tid]); + cmp[tid] += (values[8] < values[tid]); + cmp[tid] += (values[9] < values[tid]); + cmp[tid] += (values[10] < values[tid]); + cmp[tid] += (values[11] < values[tid]); + cmp[tid] += (values[12] < values[tid]); + cmp[tid] += (values[13] < values[tid]); + cmp[tid] += (values[14] < values[tid]); + cmp[tid] += (values[15] < values[tid]); - v.x = __uint2float_rn(x) * 1.0f / 31.0f; - v.y = __uint2float_rn(y) * 1.0f / 63.0f; - v.z = __uint2float_rn(z) * 1.0f / 31.0f; - - //v.x = u8_to_float((x << 3) | (x >> 2)); - //v.y = u8_to_float((y << 2) | (y >> 4)); - //v.z = u8_to_float((z << 3) | (z >> 2)); - - return v; -} -#endif -inline __device__ float2 roundAndExpand56(float2 v, ushort * w) -{ - uint x = __float2uint_rn(__saturatef(v.x) * 31.0f); - uint y = __float2uint_rn(__saturatef(v.y) * 63.0f); - *w = (x << 11) | (y << 5); - v.x = __uint2float_rn(x) * 1.0f / 31.0f; - v.y = __uint2float_rn(y) * 1.0f / 63.0f; - return v; -} - -inline __device__ float2 roundAndExpand88(float2 v, ushort * w) -{ - uint x = __float2uint_rn(__saturatef(v.x) * 255.0f); - uint y = __float2uint_rn(__saturatef(v.y) * 255.0f); - *w = (x << 8) | y; - v.x = __uint2float_rn(x) * 1.0f / 255.0f; - v.y = __uint2float_rn(y) * 1.0f / 255.0f; - return v; -} - - -//////////////////////////////////////////////////////////////////////////////// -// Block errors -//////////////////////////////////////////////////////////////////////////////// - -__device__ float3 blockError4(const float3 * colors, uint permutation, float3 a, float3 b) -{ - float3 error = make_float3(0.0f, 0.0f, 0.0f); - - for (int i = 0; i < 16; i++) - { - const uint bits = permutation >> (2*i); - - float beta = (bits & 1); - if (bits & 2) beta = (1 + beta) / 3.0f; - float alpha = 1.0f - beta; - - float3 diff = colors[i] - (a*alpha + b*beta); - - error += diff*diff; - } - - return error; -} - -__device__ float3 blockError4(const float3 * colors, uint permutation, ushort c0, ushort c1) -{ - float3 error = make_float3(0.0f, 0.0f, 0.0f); - - int3 color0 = color16ToInt3(c0); - int3 color1 = color16ToInt3(c1); - - for (int i = 0; i < 16; i++) - { - const uint bits = permutation >> (2*i); - - int beta = (bits & 1); - if (bits & 2) beta = (1 + beta); - float alpha = 3 - beta; - - int3 color; - color.x = (color0.x * alpha + color1.x * beta) / 3; - color.y = (color0.y * alpha + color1.y * beta) / 3; - color.z = (color0.z * alpha + color1.z * beta) / 3; - - float3 diff = colors[i] - int3ToFloat3(color); - - error += diff*diff; - } - - return error; -} - + // Resolve elements with the same index. + if (tid > 0 && cmp[tid] == cmp[0]) ++cmp[tid]; + if (tid > 1 && cmp[tid] == cmp[1]) ++cmp[tid]; + if (tid > 2 && cmp[tid] == cmp[2]) ++cmp[tid]; + if (tid > 3 && cmp[tid] == cmp[3]) ++cmp[tid]; + if (tid > 4 && cmp[tid] == cmp[4]) ++cmp[tid]; + if (tid > 5 && cmp[tid] == cmp[5]) ++cmp[tid]; + if (tid > 6 && cmp[tid] == cmp[6]) ++cmp[tid]; + if (tid > 7 && cmp[tid] == cmp[7]) ++cmp[tid]; + if (tid > 8 && cmp[tid] == cmp[8]) ++cmp[tid]; + if (tid > 9 && cmp[tid] == cmp[9]) ++cmp[tid]; + if (tid > 10 && cmp[tid] == cmp[10]) ++cmp[tid]; + if (tid > 11 && cmp[tid] == cmp[11]) ++cmp[tid]; + if (tid > 12 && cmp[tid] == cmp[12]) ++cmp[tid]; + if (tid > 13 && cmp[tid] == cmp[13]) ++cmp[tid]; + if (tid > 14 && cmp[tid] == cmp[14]) ++cmp[tid]; +#else -__device__ float3 blockError3(const float3 * colors, uint permutation, float3 a, float3 b) -{ - float3 error = make_float3(0.0f, 0.0f, 0.0f); + cmp[tid] = 0; + #pragma unroll for (int i = 0; i < 16; i++) { - const uint bits = permutation >> (2*i); - - float beta = (bits & 1); - if (bits & 2) beta = 0.5f; - float alpha = 1.0f - beta; - - float3 diff = colors[i] - (a*alpha + b*beta); - - error += diff*diff; - } - - return error; -} - - -//////////////////////////////////////////////////////////////////////////////// -// Sort colors -//////////////////////////////////////////////////////////////////////////////// - -// @@ Experimental code to avoid duplicate colors for faster compression. -// We could first sort along the best fit line and only compare colors that have the same projection. -// The hardest part is to maintain the indices to map packed/sorted colors to the input colors. -// We also need to update several functions that assume the number of colors is fixed to 16. -// And compute different bit maps for the different color counts. -// This is a fairly high amount of work. -__device__ int packColors(float3 * values, float * weights, int * ranks) -{ - const int tid = threadIdx.x; - - __shared__ int count; - count = 0; - - bool alive = true; + cmp[tid] += (values[i] < values[tid]); + } - // Append this - for (int i = 0; i < 16; i++) + // Resolve elements with the same index. + #pragma unroll + for (int i = 0; i < 15; i++) { - // One thread leads on each iteration. - if (tid == i) { - - // If thread alive, then append element. - if (alive) { - values[count] = values[i]; - weights[count] = weights[i]; - count++; - } - - // Otherwise update weight. - else { - weights[ranks[i]] += weights[i]; - } - } - - // Kill all threads that have the same element and record rank. - if (values[i] == values[tid]) { - alive = false; - ranks[tid] = count - 1; - } + if (tid > 0 && cmp[tid] == cmp[i]) ++cmp[tid]; } - - return count; -} - - -__device__ void sortColors(const float * values, int * ranks) -{ -#if __DEVICE_EMULATION__ - if (threadIdx.x == 0) - { - for (int tid = 0; tid < 16; tid++) - { - int rank = 0; - for (int i = 0; i < 16; i++) - { - rank += (values[i] < values[tid]); - } - - ranks[tid] = rank; - } - - // Resolve elements with the same index. - for (int i = 0; i < 15; i++) - { - for (int tid = 0; tid < 16; tid++) - { - if (tid > i && ranks[tid] == ranks[i]) ++ranks[tid]; - } - } - } -#else - const int tid = threadIdx.x; - - int rank = 0; - - #pragma unroll - for (int i = 0; i < 16; i++) - { - rank += (values[i] < values[tid]); - } - - ranks[tid] = rank; - - // Resolve elements with the same index. - #pragma unroll - for (int i = 0; i < 15; i++) - { - if ((tid > i) & (ranks[tid] == ranks[i])) ++ranks[tid]; - } -#endif -} - -__device__ void sortColors(const float * values, int * ranks, int count) -{ -#if __DEVICE_EMULATION__ - if (threadIdx.x == 0) - { - for (int tid = 0; tid < count; tid++) - { - int rank = 0; - for (int i = 0; i < count; i++) - { - rank += (values[i] < values[tid]); - } - - ranks[tid] = rank; - } - - // Resolve elements with the same index. - for (int i = 0; i < count-1; i++) - { - for (int tid = 0; tid < count; tid++) - { - if (tid > i && ranks[tid] == ranks[i]) ++ranks[tid]; - } - } - } -#else - const int tid = threadIdx.x; - - int rank = 0; - - #pragma unroll - for (int i = 0; i < count; i++) - { - rank += (values[i] < values[tid]); - } - - ranks[tid] = rank; - - // Resolve elements with the same index. - #pragma unroll - for (int i = 0; i < count-1; i++) - { - if ((tid > i) & (ranks[tid] == ranks[i])) ++ranks[tid]; - } #endif } - //////////////////////////////////////////////////////////////////////////////// // Load color block to shared mem //////////////////////////////////////////////////////////////////////////////// - -/*__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor) +__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor) { const int bid = blockIdx.x; const int idx = threadIdx.x; @@ -453,58 +131,14 @@ __device__ void sortColors(const float * values, int * ranks, int count) // Read color and copy to shared mem. uint c = image[(bid) * 16 + idx]; - colors[idx] = color32ToFloat3(c); + colors[idx].z = ((c >> 0) & 0xFF) * (1.0f / 255.0f); + colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f); + colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f); // No need to synchronize, 16 < warp size. - __debugsync(); - - // Sort colors along the best fit line. - colorSums(colors, sums); - float3 axis = bestFitLine(colors, sums[0], kColorMetric); - - *sameColor = (axis == make_float3(0, 0, 0)); - - dps[idx] = dot(colors[idx], axis); - - __debugsync(); - - sortColors(dps, xrefs); - - float3 tmp = colors[idx]; - __debugsync(); - colors[xrefs[idx]] = tmp; - } #if __DEVICE_EMULATION__ - else - { - __debugsync(); - __debugsync(); - __debugsync(); - } + } __debugsync(); if (idx < 16) { #endif -}*/ - -__device__ void loadColorBlockTex(uint firstBlock, uint width, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor) -{ - const int bid = blockIdx.x; - const int idx = threadIdx.x; - - __shared__ float dps[16]; - - if (idx < 16) - { - float x = 4 * ((firstBlock + bid) % width) + idx % 4; // @@ Avoid mod and div by using 2D grid? - float y = 4 * ((firstBlock + bid) / width) + idx / 4; - - // Read color and copy to shared mem. - float4 c = tex2D(tex, x, y); - - colors[idx].x = c.z; - colors[idx].y = c.y; - colors[idx].z = c.x; - - // No need to synchronize, 16 < warp size. - __debugsync(); // Sort colors along the best fit line. colorSums(colors, sums); @@ -514,123 +148,17 @@ __device__ void loadColorBlockTex(uint firstBlock, uint width, float3 colors[16] dps[idx] = dot(colors[idx], axis); - __debugsync(); - - sortColors(dps, xrefs); - - float3 tmp = colors[idx]; - __debugsync(); - colors[xrefs[idx]] = tmp; - } #if __DEVICE_EMULATION__ - else - { - __debugsync(); - __debugsync(); - __debugsync(); - } + } __debugsync(); if (idx < 16) { #endif -} - -/* -__device__ void loadColorBlockTex(uint firstBlock, uint w, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor) -{ - const int bid = blockIdx.x; - const int idx = threadIdx.x; - - __shared__ float dps[16]; - - if (idx < 16) - { - float x = 4 * ((firstBlock + bid) % w) + idx % 4; // @@ Avoid mod and div by using 2D grid? - float y = 4 * ((firstBlock + bid) / w) + idx / 4; - - // Read color and copy to shared mem. - float4 c = tex2D(tex, x, y); - - colors[idx].x = c.z; - colors[idx].y = c.y; - colors[idx].z = c.x; - weights[idx] = 1; - - int count = packColors(colors, weights); - if (idx < count) - { - // Sort colors along the best fit line. - colorSums(colors, sums); - float3 axis = bestFitLine(colors, sums[0], kColorMetric); - - *sameColor = (axis == make_float3(0, 0, 0)); - - dps[idx] = dot(colors[idx], axis); - - sortColors(dps, xrefs); - - float3 tmp = colors[idx]; - colors[xrefs[idx]] = tmp; - } - } -} -*/ - -__device__ void loadColorBlockTex(uint firstBlock, uint width, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor) -{ - const int bid = blockIdx.x; - const int idx = threadIdx.x; - - __shared__ float3 rawColors[16]; - __shared__ float dps[16]; - - if (idx < 16) - { - float x = 4 * ((firstBlock + bid) % width) + idx % 4; // @@ Avoid mod and div by using 2D grid? - float y = 4 * ((firstBlock + bid) / width) + idx / 4; - - // Read color and copy to shared mem. - float4 c = tex2D(tex, x, y); - - rawColors[idx].x = c.z; - rawColors[idx].y = c.y; - rawColors[idx].z = c.x; - weights[idx] = c.w; - - colors[idx] = rawColors[idx] * weights[idx]; - - // No need to synchronize, 16 < warp size. - __debugsync(); - - // Sort colors along the best fit line. - colorSums(colors, sums); - float3 axis = bestFitLine(colors, sums[0], kColorMetric); - - *sameColor = (axis == make_float3(0, 0, 0)); - - // Single color compressor needs unweighted colors. - if (*sameColor) colors[idx] = rawColors[idx]; - - dps[idx] = dot(colors[idx], axis); - - __debugsync(); sortColors(dps, xrefs); float3 tmp = colors[idx]; - float w = weights[idx]; - __debugsync(); colors[xrefs[idx]] = tmp; - weights[xrefs[idx]] = w; } -#if __DEVICE_EMULATION__ - else - { - __debugsync(); - __debugsync(); - __debugsync(); - } -#endif } -/* __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor) { const int bid = blockIdx.x; @@ -651,8 +179,11 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum colors[idx] = rawColors[idx] * weights[idx]; + // No need to synchronize, 16 < warp size. - __debugsync(); +#if __DEVICE_EMULATION__ + } __debugsync(); if (idx < 16) { +#endif // Sort colors along the best fit line. colorSums(colors, sums); @@ -665,69 +196,34 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum dps[idx] = dot(rawColors[idx], axis); - __debugsync(); +#if __DEVICE_EMULATION__ + } __debugsync(); if (idx < 16) { +#endif sortColors(dps, xrefs); float3 tmp = colors[idx]; - float w = weights[idx]; - __debugsync(); colors[xrefs[idx]] = tmp; + + float w = weights[idx]; weights[xrefs[idx]] = w; } -#if __DEVICE_EMULATION__ - else - { - __debugsync(); - __debugsync(); - __debugsync(); - } -#endif } -*/ - -__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16], int * sameColor) -{ - const int bid = blockIdx.x; - const int idx = threadIdx.x; - __shared__ float dps[16]; - - if (idx < 16) - { - // Read color and copy to shared mem. - uint c = image[(bid) * 16 + idx]; - - colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f); - colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f); - - // No need to synchronize, 16 < warp size. - __debugsync(); - // Sort colors along the best fit line. - colorSums(colors, sums); - float2 axis = bestFitLine(colors, sums[0]); - - *sameColor = (axis == make_float2(0, 0)); - - dps[idx] = dot(colors[idx], axis); - - __debugsync(); - - sortColors(dps, xrefs); - - float2 tmp = colors[idx]; - __debugsync(); - colors[xrefs[idx]] = tmp; - } -#if __DEVICE_EMULATION__ - else - { - __debugsync(); - __debugsync(); - __debugsync(); - } -#endif +//////////////////////////////////////////////////////////////////////////////// +// Round color to RGB565 and expand +//////////////////////////////////////////////////////////////////////////////// +inline __device__ float3 roundAndExpand565(float3 v, ushort * w) +{ + v.x = rintf(__saturatef(v.x) * 31.0f); + v.y = rintf(__saturatef(v.y) * 63.0f); + v.z = rintf(__saturatef(v.z) * 31.0f); + *w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z; + v.x *= 0.03227752766457f; // approximate integer bit expansion. + v.y *= 0.01583151765563f; + v.z *= 0.03227752766457f; + return v; } @@ -852,8 +348,6 @@ __device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint // compute the error float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - //float3 e = blockError4(colors, permutation, *start, *end); - return (1.0f / 9.0f) * dot(e, kColorMetricSqr); } @@ -890,8 +384,6 @@ __device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint // compute the error float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - //float3 e = blockError3(colors, permutation, a, b); - return (1.0f / 4.0f) * dot(e, kColorMetricSqr); } @@ -977,114 +469,6 @@ __device__ float evalPermutation3(const float3 * colors, const float * weights, } */ -__device__ float evalPermutation4(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end) -{ - // Compute endpoints using least squares. - float2 alphax_sum = make_float2(0.0f, 0.0f); - uint akku = 0; - - // Compute alpha & beta for this permutation. - #pragma unroll - for (int i = 0; i < 16; i++) - { - const uint bits = permutation >> (2*i); - - alphax_sum += alphaTable4[bits & 3] * colors[i]; - akku += prods4[bits & 3]; - } - - float alpha2_sum = float(akku >> 16); - float beta2_sum = float((akku >> 8) & 0xff); - float alphabeta_sum = float(akku & 0xff); - float2 betax_sum = 9.0f * color_sum - alphax_sum; - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - // Round a, b to the closest 5-6 color and expand... - a = roundAndExpand56(a, start); - b = roundAndExpand56(b, end); - - // compute the error - float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - - return (1.0f / 9.0f) * (e.x + e.y); -} - -__device__ float evalPermutation3(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end) -{ - // Compute endpoints using least squares. - float2 alphax_sum = make_float2(0.0f, 0.0f); - uint akku = 0; - - // Compute alpha & beta for this permutation. - #pragma unroll - for (int i = 0; i < 16; i++) - { - const uint bits = permutation >> (2*i); - - alphax_sum += alphaTable3[bits & 3] * colors[i]; - akku += prods3[bits & 3]; - } - - float alpha2_sum = float(akku >> 16); - float beta2_sum = float((akku >> 8) & 0xff); - float alphabeta_sum = float(akku & 0xff); - float2 betax_sum = 4.0f * color_sum - alphax_sum; - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - // Round a, b to the closest 5-6 color and expand... - a = roundAndExpand56(a, start); - b = roundAndExpand56(b, end); - - // compute the error - float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - - return (1.0f / 4.0f) * (e.x + e.y); -} - -__device__ float evalPermutationCTX(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end) -{ - // Compute endpoints using least squares. - float2 alphax_sum = make_float2(0.0f, 0.0f); - uint akku = 0; - - // Compute alpha & beta for this permutation. - #pragma unroll - for (int i = 0; i < 16; i++) - { - const uint bits = permutation >> (2*i); - - alphax_sum += alphaTable4[bits & 3] * colors[i]; - akku += prods4[bits & 3]; - } - - float alpha2_sum = float(akku >> 16); - float beta2_sum = float((akku >> 8) & 0xff); - float alphabeta_sum = float(akku & 0xff); - float2 betax_sum = 9.0f * color_sum - alphax_sum; - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - // Round a, b to the closest 8-8 color and expand... - a = roundAndExpand88(a, start); - b = roundAndExpand88(b, end); - - // compute the error - float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - - return (1.0f / 9.0f) * (e.x + e.y); -} - //////////////////////////////////////////////////////////////////////////////// // Evaluate all permutations @@ -1096,69 +480,7 @@ __device__ void evalAllPermutations(const float3 * colors, float3 colorSum, cons float bestError = FLT_MAX; __shared__ uint s_permutations[160]; - - for(int i = 0; i < 16; i++) - { - int pidx = idx + NUM_THREADS * i; - if (pidx >= 992) break; - - ushort start, end; - uint permutation = permutations[pidx]; - if (pidx < 160) s_permutations[pidx] = permutation; - - float error = evalPermutation4(colors, colorSum, permutation, &start, &end); - - if (error < bestError) - { - bestError = error; - bestPermutation = permutation; - bestStart = start; - bestEnd = end; - } - } - - if (bestStart < bestEnd) - { - swap(bestEnd, bestStart); - bestPermutation ^= 0x55555555; // Flip indices. - } - - for(int i = 0; i < 3; i++) - { - int pidx = idx + NUM_THREADS * i; - if (pidx >= 160) break; - - ushort start, end; - uint permutation = s_permutations[pidx]; - float error = evalPermutation3(colors, colorSum, permutation, &start, &end); - - if (error < bestError) - { - bestError = error; - bestPermutation = permutation; - bestStart = start; - bestEnd = end; - - if (bestStart > bestEnd) - { - swap(bestEnd, bestStart); - bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices. - } - } - } - - errors[idx] = bestError; -} - -/* -__device__ void evalAllPermutations(const float3 * colors, const float * weights, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) -{ - const int idx = threadIdx.x; - - float bestError = FLT_MAX; - - __shared__ uint s_permutations[160]; - + for(int i = 0; i < 16; i++) { int pidx = idx + NUM_THREADS * i; @@ -1167,8 +489,8 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights ushort start, end; uint permutation = permutations[pidx]; if (pidx < 160) s_permutations[pidx] = permutation; - - float error = evalPermutation4(colors, weights, permutation, &start, &end); + + float error = evalPermutation4(colors, colorSum, permutation, &start, &end); if (error < bestError) { @@ -1192,7 +514,7 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights ushort start, end; uint permutation = s_permutations[pidx]; - float error = evalPermutation3(colors, weights, permutation, &start, &end); + float error = evalPermutation3(colors, colorSum, permutation, &start, &end); if (error < bestError) { @@ -1211,16 +533,16 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights errors[idx] = bestError; } -*/ -__device__ void evalAllPermutations(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) +/* +__device__ void evalAllPermutations(const float3 * colors, const float * weights, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) { const int idx = threadIdx.x; float bestError = FLT_MAX; __shared__ uint s_permutations[160]; - + for(int i = 0; i < 16; i++) { int pidx = idx + NUM_THREADS * i; @@ -1229,8 +551,8 @@ __device__ void evalAllPermutations(const float2 * colors, float2 colorSum, cons ushort start, end; uint permutation = permutations[pidx]; if (pidx < 160) s_permutations[pidx] = permutation; - - float error = evalPermutation4(colors, colorSum, permutation, &start, &end); + + float error = evalPermutation4(colors, weights, permutation, &start, &end); if (error < bestError) { @@ -1254,7 +576,7 @@ __device__ void evalAllPermutations(const float2 * colors, float2 colorSum, cons ushort start, end; uint permutation = s_permutations[pidx]; - float error = evalPermutation3(colors, colorSum, permutation, &start, &end); + float error = evalPermutation3(colors, weights, permutation, &start, &end); if (error < bestError) { @@ -1273,6 +595,7 @@ __device__ void evalAllPermutations(const float2 * colors, float2 colorSum, cons errors[idx] = bestError; } +*/ __device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) { @@ -1342,40 +665,6 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig errors[idx] = bestError; } -__device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) -{ - const int idx = threadIdx.x; - - float bestError = FLT_MAX; - - for(int i = 0; i < 16; i++) - { - int pidx = idx + NUM_THREADS * i; - if (pidx >= 704) break; - - ushort start, end; - uint permutation = permutations[pidx]; - - float error = evalPermutationCTX(colors, colorSum, permutation, &start, &end); - - if (error < bestError) - { - bestError = error; - bestPermutation = permutation; - bestStart = start; - bestEnd = end; - } - } - - if (bestStart < bestEnd) - { - swap(bestEnd, bestStart); - bestPermutation ^= 0x55555555; // Flip indices. - } - - errors[idx] = bestError; -} - //////////////////////////////////////////////////////////////////////////////// // Find index with minimum error @@ -1403,6 +692,7 @@ __device__ int findMinError(float * errors) } } } + #else for(int d = NUM_THREADS/2; d > 32; d >>= 1) { @@ -1485,107 +775,6 @@ __device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xr result[bid].y = indices; } -__device__ void saveBlockDXT1_Parallel(uint endpoints, float3 colors[16], int xrefs[16], uint * result) -{ - const int tid = threadIdx.x; - const int bid = blockIdx.x; - - if (tid < 16) - { - int3 color = float3ToInt3(colors[xrefs[tid]]); - - ushort endpoint0 = endpoints & 0xFFFF; - ushort endpoint1 = endpoints >> 16; - - int3 palette[4]; - palette[0] = color16ToInt3(endpoint0); - palette[1] = color16ToInt3(endpoint1); - - int d0 = colorDistance(palette[0], color); - int d1 = colorDistance(palette[1], color); - - uint index; - if (endpoint0 > endpoint1) - { - palette[2].x = (2 * palette[0].x + palette[1].x) / 3; - palette[2].y = (2 * palette[0].y + palette[1].y) / 3; - palette[2].z = (2 * palette[0].z + palette[1].z) / 3; - - palette[3].x = (2 * palette[1].x + palette[0].x) / 3; - palette[3].y = (2 * palette[1].y + palette[0].y) / 3; - palette[3].z = (2 * palette[1].z + palette[0].z) / 3; - - int d2 = colorDistance(palette[2], color); - int d3 = colorDistance(palette[3], color); - - // Compute the index that best fit color. - uint b0 = d0 > d3; - uint b1 = d1 > d2; - uint b2 = d0 > d2; - uint b3 = d1 > d3; - uint b4 = d2 > d3; - - uint x0 = b1 & b2; - uint x1 = b0 & b3; - uint x2 = b0 & b4; - - index = (x2 | ((x0 | x1) << 1)); - } - else { - palette[2].x = (palette[0].x + palette[1].x) / 2; - palette[2].y = (palette[0].y + palette[1].y) / 2; - palette[2].z = (palette[0].z + palette[1].z) / 2; - - int d2 = colorDistance(palette[2], color); - - index = 0; - if (d1 < d0 && d1 < d2) index = 1; - else if (d2 < d0) index = 2; - } - - __shared__ uint indices[16]; - - indices[tid] = index << (2 * tid); - if (tid < 8) indices[tid] |= indices[tid+8]; - if (tid < 4) indices[tid] |= indices[tid+4]; - if (tid < 2) indices[tid] |= indices[tid+2]; - if (tid < 1) indices[tid] |= indices[tid+1]; - - if (tid < 2) { - result[2 * bid + tid] = tid == 0 ? endpoints : indices[0]; - } - } -} - -__device__ void saveBlockDXT1_Parallel(uint endpoints, uint permutation, int xrefs[16], uint * result) -{ - const int tid = threadIdx.x; - const int bid = blockIdx.x; - - if (tid < 16) - { - // Reorder permutation. - uint index = ((permutation >> (2 * xrefs[tid])) & 3) << (2 * tid); - __shared__ uint indices[16]; - - indices[tid] = index; - if (tid < 8) indices[tid] |= indices[tid+8]; - if (tid < 4) indices[tid] |= indices[tid+4]; - if (tid < 2) indices[tid] |= indices[tid+2]; - if (tid < 1) indices[tid] |= indices[tid+1]; - - if (tid < 2) { - result[2 * bid + tid] = tid == 0 ? endpoints : indices[0]; - } - } -} - - -__device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result) -{ - saveBlockDXT1(start, end, permutation, xrefs, result); -} - __device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result) { const int bid = blockIdx.x; @@ -1609,54 +798,18 @@ __device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result) } } -__device__ void saveSingleColorBlockDXT1(float2 color, uint2 * result) -{ - const int bid = blockIdx.x; - - int r = color.x * 255; - int g = color.y * 255; - - ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5); - ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5); - - if (color0 < color1) - { - result[bid].x = (color0 << 16) | color1; - result[bid].y = 0xffffffff; - } - else - { - result[bid].x = (color1 << 16) | color0; - result[bid].y = 0xaaaaaaaa; - } -} - -__device__ void saveSingleColorBlockCTX1(float2 color, uint2 * result) -{ - const int bid = blockIdx.x; - - int r = color.x * 255; - int g = color.y * 255; - - ushort color0 = (r << 8) | (g); - - result[bid].x = (color0 << 16) | color0; - result[bid].y = 0x00000000; -} - //////////////////////////////////////////////////////////////////////////////// // Compress color block //////////////////////////////////////////////////////////////////////////////// - -__global__ void compressDXT1(uint firstBlock, uint w, const uint * permutations, uint2 * result) +__global__ void compressDXT1(const uint * permutations, const uint * image, uint2 * result) { __shared__ float3 colors[16]; __shared__ float3 sums[16]; __shared__ int xrefs[16]; __shared__ int sameColor; - loadColorBlockTex(firstBlock, w, colors, sums, xrefs, &sameColor); + loadColorBlock(image, colors, sums, xrefs, &sameColor); __syncthreads(); @@ -1669,37 +822,28 @@ __global__ void compressDXT1(uint firstBlock, uint w, const uint * permutations, ushort bestStart, bestEnd; uint bestPermutation; - __shared__ float errors[NUM_THREADS]; - evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); - - // Use a parallel reduction to find minimum error. - const int minIdx = findMinError(errors); - - __shared__ uint s_bestEndPoints; - __shared__ uint s_bestPermutation; + __shared__ float errors[NUM_THREADS]; + evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); + + // Use a parallel reduction to find minimum error. + const int minIdx = findMinError(errors); + // Only write the result of the winner thread. if (threadIdx.x == minIdx) { - s_bestEndPoints = (bestEnd << 16) | bestStart; - s_bestPermutation = (bestStart != bestEnd) ? bestPermutation : 0; + saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result); } - - __syncthreads(); - - saveBlockDXT1_Parallel(s_bestEndPoints, colors, xrefs, (uint *)result); - //saveBlockDXT1_Parallel(s_bestEndPoints, s_bestPermutation, xrefs, (uint *)result); } - -__global__ void compressLevel4DXT1(uint firstBlock, uint w, const uint * permutations, uint2 * result) +__global__ void compressLevel4DXT1(const uint * permutations, const uint * image, uint2 * result) { __shared__ float3 colors[16]; __shared__ float3 sums[16]; __shared__ int xrefs[16]; __shared__ int sameColor; - loadColorBlockTex(firstBlock, w, colors, sums, xrefs, &sameColor); + loadColorBlock(image, colors, sums, xrefs, &sameColor); __syncthreads(); @@ -1726,7 +870,7 @@ __global__ void compressLevel4DXT1(uint firstBlock, uint w, const uint * permuta } } -__global__ void compressWeightedDXT1(uint firstBlock, uint w, const uint * permutations, uint2 * result) +__global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result) { __shared__ float3 colors[16]; __shared__ float3 sums[16]; @@ -1734,7 +878,7 @@ __global__ void compressWeightedDXT1(uint firstBlock, uint w, const uint * permu __shared__ int xrefs[16]; __shared__ int sameColor; - loadColorBlockTex(firstBlock, w, colors, sums, weights, xrefs, &sameColor); + loadColorBlock(image, colors, sums, weights, xrefs, &sameColor); __syncthreads(); @@ -1762,75 +906,6 @@ __global__ void compressWeightedDXT1(uint firstBlock, uint w, const uint * permu } -__global__ void compressNormalDXT1(const uint * permutations, const uint * image, uint2 * result) -{ - __shared__ float2 colors[16]; - __shared__ float2 sums[16]; - __shared__ int xrefs[16]; - __shared__ int sameColor; - - loadColorBlock(image, colors, sums, xrefs, &sameColor); - - __syncthreads(); - - if (sameColor) - { - if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result); - return; - } - - ushort bestStart, bestEnd; - uint bestPermutation; - - __shared__ float errors[NUM_THREADS]; - - evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); - - // Use a parallel reduction to find minimum error. - const int minIdx = findMinError(errors); - - // Only write the result of the winner thread. - if (threadIdx.x == minIdx) - { - saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result); - } -} - -__global__ void compressCTX1(const uint * permutations, const uint * image, uint2 * result) -{ - __shared__ float2 colors[16]; - __shared__ float2 sums[16]; - __shared__ int xrefs[16]; - __shared__ int sameColor; - - loadColorBlock(image, colors, sums, xrefs, &sameColor); - - __syncthreads(); - - if (sameColor) - { - if (threadIdx.x == 0) saveSingleColorBlockCTX1(colors[0], result); - return; - } - - ushort bestStart, bestEnd; - uint bestPermutation; - - __shared__ float errors[NUM_THREADS]; - - evalAllPermutationsCTX(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); - - // Use a parallel reduction to find minimum error. - const int minIdx = findMinError(errors); - - // Only write the result of the winner thread. - if (threadIdx.x == minIdx) - { - saveBlockCTX1(bestStart, bestEnd, bestPermutation, xrefs, result); - } -} - - /* __device__ float computeError(const float weights[16], uchar a0, uchar a1) { @@ -2009,125 +1084,6 @@ __global__ void compressDXT5(const uint * permutations, const uint * image, uint } */ -/*__device__ void evaluatePalette(uint alpha0, uint alpha1, uint alphas[8]) -{ - alpha[0] = alpha0; - alpha[1] = alpha1; - alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7; // bit code 010 - alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7; // bit code 011 - alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7; // bit code 100 - alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7; // bit code 101 - alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7; // bit code 110 - alpha[7] = (1 * alpha[0] + 6 * alpha[1]) / 7; // bit code 111 -} - -__device__ uint computeAlphaError(const uint block[16], uint alpha0, uint alpha1, int bestError = INT_MAX) -{ - uint8 alphas[8]; - evaluatePalette(alpha0, alpha1, alphas); - - int totalError = 0; - - for (uint i = 0; i < 16; i++) - { - uint8 alpha = block[i]; - - // @@ It should be possible to do this much faster. - - int minDist = INT_MAX; - for (uint p = 0; p < 8; p++) - { - int dist = alphaDistance(alpha, alphas[p]); - minDist = min(dist, minDist); - } - - - - totalError += minDist; - - if (totalError > bestError) - { - // early out - return totalError; - } - } - - return totalError; -} - - -void compressDXT5A(uint alpha[16]) -{ - // Get min/max alpha. - for (uint i = 0; i < 16; i++) - { - mina = min(mina, alpha[i]); - maxa = max(maxa, alpha[i]); - } - - dxtBlock->alpha0 = maxa; - dxtBlock->alpha1 = mina; - - if (maxa - mina > 8) - { - int besterror = computeAlphaError(rgba, dxtBlock); - int besta0 = maxa; - int besta1 = mina; - - // Expand search space a bit. - const int alphaExpand = 8; - mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand; - maxa = (maxa <= 255-alphaExpand) ? 255 : maxa + alphaExpand; - - for (int a0 = mina+9; a0 < maxa; a0++) - { - for (int a1 = mina; a1 < a0-8; a1++) - { - nvDebugCheck(a0 - a1 > 8); - - dxtBlock->alpha0 = a0; - dxtBlock->alpha1 = a1; - int error = computeAlphaError(rgba, dxtBlock, besterror); - - if (error < besterror) - { - besterror = error; - besta0 = a0; - besta1 = a1; - } - } - } - - dxtBlock->alpha0 = besta0; - dxtBlock->alpha1 = besta1; - } -} - -__global__ void compressDXT5n(uint blockNum, uint2 * d_result) -{ - uint idx = blockIdx.x * 128 + threadIdx.x; - - if (idx >= blockNum) - { - return; - } - - // @@ Ideally we would load the data to shared mem to achieve coalesced global mem access. - // @@ Blocks would require too much shared memory (8k) and limit occupancy. - - // @@ Ideally we should use SIMD processing, multiple threads (4-8) processing the same block. - // That simplifies coalescing, and reduces divergence. - - // @@ Experiment with texture. That's probably the most simple approach. - - uint x[16]; - uint y[16]; - - -} -*/ - - //////////////////////////////////////////////////////////////////////////////// // Setup kernel //////////////////////////////////////////////////////////////////////////////// @@ -2145,83 +1101,22 @@ extern "C" void setupCompressKernel(const float weights[3]) cudaMemcpyToSymbol(kColorMetricSqr, weightsSqr, sizeof(float) * 3, 0); } -extern "C" void bindTextureToArray(cudaArray * d_data) -{ - // Setup texture - tex.normalized = false; - tex.filterMode = cudaFilterModePoint; - tex.addressMode[0] = cudaAddressModeClamp; - tex.addressMode[1] = cudaAddressModeClamp; - - cudaBindTextureToArray(tex, d_data); -} - - //////////////////////////////////////////////////////////////////////////////// // Launch kernel //////////////////////////////////////////////////////////////////////////////// -// DXT1 compressors: -extern "C" void compressKernelDXT1(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps) -{ - compressDXT1<<>>(firstBlock, w, d_bitmaps, (uint2 *)d_result); -} - -extern "C" void compressKernelDXT1_Level4(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps) -{ - compressLevel4DXT1<<>>(firstBlock, w, d_bitmaps, (uint2 *)d_result); -} - -extern "C" void compressWeightedKernelDXT1(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps) -{ - compressWeightedDXT1<<>>(firstBlock, w, d_bitmaps, (uint2 *)d_result); -} - -// @@ DXT1a compressors. - - -// @@ DXT3 compressors: -extern "C" void compressKernelDXT3(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps) -{ - //compressDXT3<<>>(firstBlock, w, d_bitmaps, (uint2 *)d_result); -} - -extern "C" void compressWeightedKernelDXT3(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps) +extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) { - //compressWeightedDXT3<<>>(firstBlock, w, d_bitmaps, (uint2 *)d_result); + compressDXT1<<>>(d_bitmaps, d_data, (uint2 *)d_result); } - -// @@ DXT5 compressors. -extern "C" void compressKernelDXT5(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps) -{ - //compressDXT5<<>>(firstBlock, w, d_bitmaps, (uint2 *)d_result); -} - -extern "C" void compressWeightedKernelDXT5(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps) -{ - //compressWeightedDXT5<<>>(firstBlock, w, d_bitmaps, (uint2 *)d_result); -} - - - - - -/* -extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) +extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) { - compressNormalDXT1<<>>(d_bitmaps, d_data, (uint2 *)d_result); + compressLevel4DXT1<<>>(d_bitmaps, d_data, (uint2 *)d_result); } -extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) -{ - compressCTX1<<>>(d_bitmaps, d_data, (uint2 *)d_result); -} -*/ -/* -extern "C" void compressKernelDXT5n(uint blockNum, cudaArray * d_data, uint * d_result) +extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) { -// compressDXT5n<<>>(blockNum, (uint2 *)d_result); + compressWeightedDXT1<<>>(d_bitmaps, d_data, (uint2 *)d_result); } -*/ \ No newline at end of file diff --git a/src/nvtt/cuda/CudaCompressDXT.cpp b/src/nvtt/cuda/CudaCompressDXT.cpp index 8fe0f48..c59bedd 100644 --- a/src/nvtt/cuda/CudaCompressDXT.cpp +++ b/src/nvtt/cuda/CudaCompressDXT.cpp @@ -1,647 +1,380 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "CudaCompressDXT.h" -#include "CudaUtils.h" - - -#if defined HAVE_CUDA -#include -#endif - -#include -#include - -using namespace nv; -using namespace nvtt; - -#if defined HAVE_CUDA - -#define MAX_BLOCKS 8192U // 32768, 65535 - - -extern "C" void setupCompressKernel(const float weights[3]); -extern "C" void bindTextureToArray(cudaArray * d_data); - -extern "C" void compressKernelDXT1(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps); -extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); -extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); -extern "C" void compressKernelDXT3(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps); -//extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); -//extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); - - -#pragma message(NV_FILE_LINE "TODO: Rename Bitmaps.h to BitmapTable.h") -#include "Bitmaps.h" - -/* -// Convert linear image to block linear. -static void convertToBlockLinear(const Image * image, uint * blockLinearImage) -{ - const uint w = (image->width() + 3) / 4; - const uint h = (image->height() + 3) / 4; - - for(uint by = 0; by < h; by++) { - for(uint bx = 0; bx < w; bx++) { - const uint bw = min(image->width() - bx * 4, 4U); - const uint bh = min(image->height() - by * 4, 4U); - - for (uint i = 0; i < 16; i++) { - const int x = (i % 4) % bw; - const int y = (i / 4) % bh; - blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u; - } - } - } -} -*/ - -#endif - - -CudaContext::CudaContext() : - bitmapTable(NULL), - bitmapTableCTX(NULL), - data(NULL), - result(NULL) +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "CudaCompressDXT.h" +#include "CudaUtils.h" + + +#if defined HAVE_CUDA +#include +#endif + +#include +#include + +using namespace nv; +using namespace nvtt; + +#if defined HAVE_CUDA + +#define MAX_BLOCKS 8192U // 32768, 65535 + + +extern "C" void setupCompressKernel(const float weights[3]); +extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); +extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); +extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); + +#include "Bitmaps.h" // @@ Rename to BitmapTable.h + +// Convert linear image to block linear. +static void convertToBlockLinear(const Image * image, uint * blockLinearImage) +{ + const uint w = (image->width() + 3) / 4; + const uint h = (image->height() + 3) / 4; + + for(uint by = 0; by < h; by++) { + for(uint bx = 0; bx < w; bx++) { + const uint bw = min(image->width() - bx * 4, 4U); + const uint bh = min(image->height() - by * 4, 4U); + + for (uint i = 0; i < 16; i++) { + const int x = (i % 4) % bw; + const int y = (i / 4) % bh; + blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u; + } + } + } +} + +#endif + + +CudaCompressor::CudaCompressor() : m_bitmapTable(NULL), m_data(NULL), m_result(NULL) +{ +#if defined HAVE_CUDA + // Allocate and upload bitmaps. + cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint)); + if (m_bitmapTable != NULL) + { + cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice); + } + + // Allocate scratch buffers. + cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U); + cudaMalloc((void**) &m_result, MAX_BLOCKS * 8U); +#endif +} + +CudaCompressor::~CudaCompressor() { -#if defined HAVE_CUDA - // Allocate and upload bitmaps. - cudaMalloc((void**) &bitmapTable, 992 * sizeof(uint)); - if (bitmapTable != NULL) - { - cudaMemcpy(bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice); - } - - cudaMalloc((void**) &bitmapTableCTX, 704 * sizeof(uint)); - if (bitmapTableCTX != NULL) - { - cudaMemcpy(bitmapTableCTX, s_bitmapTableCTX, 704 * sizeof(uint), cudaMemcpyHostToDevice); - } - - // Allocate scratch buffers. - cudaMalloc((void**) &data, MAX_BLOCKS * 64U); - cudaMalloc((void**) &result, MAX_BLOCKS * 8U); -#endif +#if defined HAVE_CUDA + // Free device mem allocations. + cudaFree(m_data); + cudaFree(m_result); + cudaFree(m_bitmapTable); +#endif } -CudaContext::~CudaContext() +bool CudaCompressor::isValid() const { -#if defined HAVE_CUDA - // Free device mem allocations. - cudaFree(bitmapTableCTX); - cudaFree(bitmapTable); - cudaFree(data); - cudaFree(result); -#endif +#if defined HAVE_CUDA + if (cudaGetLastError() != cudaSuccess) + { + return false; + } +#endif + return m_data != NULL && m_result != NULL && m_bitmapTable != NULL; +} + +// @@ This code is very repetitive and needs to be cleaned up. + +void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode) +{ + m_image = image; + m_alphaMode = alphaMode; +} + +/// Compress image using CUDA. +void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(cuda::isHardwarePresent()); +#if defined HAVE_CUDA + + // Image size in blocks. + const uint w = (m_image->width() + 3) / 4; + const uint h = (m_image->height() + 3) / 4; + + uint imageSize = w * h * 16 * sizeof(Color32); + uint * blockLinearImage = (uint *) malloc(imageSize); + convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU! + + const uint blockNum = w * h; + const uint compressedSize = blockNum * 8; + + clock_t start = clock(); + + setupCompressKernel(compressionOptions.colorWeight.ptr()); + + // TODO: Add support for multiple GPUs. + uint bn = 0; + while(bn != blockNum) + { + uint count = min(blockNum - bn, MAX_BLOCKS); + + cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); + + // Launch kernel. + compressKernelDXT1(count, m_data, m_result, m_bitmapTable); + + // Check for errors. + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); + + if (outputOptions.errorHandler != NULL) + { + outputOptions.errorHandler->error(Error_CudaError); + } + } + + // Copy result to host, overwrite swizzled image. + cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost); + + // Output result. + if (outputOptions.outputHandler != NULL) + { + outputOptions.outputHandler->writeData(blockLinearImage, count * 8); + } + + bn += count; + } + + clock_t end = clock(); + //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); + + free(blockLinearImage); + +#else + if (outputOptions.errorHandler != NULL) + { + outputOptions.errorHandler->error(Error_CudaError); + } +#endif } - -bool CudaContext::isValid() const -{ -#if defined HAVE_CUDA - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(err)); - return false; - } -#endif - return bitmapTable != NULL && bitmapTableCTX != NULL && data != NULL && result != NULL; -} - - - -CudaCompressor::CudaCompressor(CudaContext & ctx) : m_ctx(ctx) -{ - -} - -void CudaCompressor::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - nvDebugCheck(cuda::isHardwarePresent()); - -#if defined HAVE_CUDA - - // Allocate image as a cuda array. - cudaArray * d_image; - if (inputFormat == nvtt::InputFormat_BGRA_8UB) - { - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned); - cudaMallocArray(&d_image, &channelDesc, w, h); - - const int imageSize = w * h * sizeof(uint); - cudaMemcpyToArray(d_image, 0, 0, data, imageSize, cudaMemcpyHostToDevice); - } - else - { -#pragma message(NV_FILE_LINE "FIXME: Floating point textures not really supported by CUDA compressors.") - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); - cudaMallocArray(&d_image, &channelDesc, w, h); - - const int imageSize = w * h * sizeof(uint); - cudaMemcpyToArray(d_image, 0, 0, data, imageSize, cudaMemcpyHostToDevice); - } - - // Image size in blocks. - const uint bw = (w + 3) / 4; - const uint bh = (h + 3) / 4; - const uint bs = blockSize(); - const uint blockNum = bw * bh; - const uint compressedSize = blockNum * bs; - - void * h_result = malloc(min(blockNum, MAX_BLOCKS) * bs); - - setup(d_image, compressionOptions); - - // Timer timer; - // timer.start(); - - uint bn = 0; - while(bn != blockNum) - { - uint count = min(blockNum - bn, MAX_BLOCKS); - - compressBlocks(bn, count, w, h, alphaMode, compressionOptions, h_result); - - // Check for errors. - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - //nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } - } - - // Output result. - if (outputOptions.outputHandler != NULL) - { - outputOptions.outputHandler->writeData(h_result, count * bs); - } - - bn += count; - } - - //timer.stop(); - //printf("\rCUDA time taken: %.3f seconds\n", timer.elapsed() / CLOCKS_PER_SEC); - - free(h_result); - cudaFreeArray(d_image); - -#else - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } -#endif - -} - - -void CudaCompressorDXT1::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) -{ - setupCompressKernel(compressionOptions.colorWeight.ptr()); - bindTextureToArray(image); -} - -void CudaCompressorDXT1::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - // Launch kernel. - compressKernelDXT1(first, count, w, m_ctx.result, m_ctx.bitmapTable); - - // Copy result to host. - cudaMemcpy(output, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); -} - - -void CudaCompressorDXT3::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) -{ - setupCompressKernel(compressionOptions.colorWeight.ptr()); - bindTextureToArray(image); -} - -void CudaCompressorDXT3::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - // Launch kernel. - compressKernelDXT3(first, count, w, m_ctx.result, m_ctx.bitmapTable); - - // Copy result to host. - cudaMemcpy(output, m_ctx.result, count * 16, cudaMemcpyDeviceToHost); -} - - -void CudaCompressorDXT5::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) -{ - setupCompressKernel(compressionOptions.colorWeight.ptr()); - bindTextureToArray(image); -} - -void CudaCompressorDXT5::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - /*// Launch kernel. - compressKernelDXT5(first, count, w, m_ctx.result, m_ctx.bitmapTable); - - // Copy result to host. - cudaMemcpy(output, m_ctx.result, count * 16, cudaMemcpyDeviceToHost);*/ - - // Launch kernel. - if (alphaMode == AlphaMode_Transparency) - { - // compressWeightedKernelDXT1(first, count, w, m_ctx.result, m_ctx.bitmapTable); - } - else - { - // compressKernelDXT1_Level4(first, count, w, m_ctx.result, m_ctx.bitmapTable); - } - - // Compress alpha in parallel with the GPU. - for (uint i = 0; i < count; i++) - { - //ColorBlock rgba(blockLinearImage + (first + i) * 16); - //OptimalCompress::compressDXT3A(rgba, alphaBlocks + i); - } - - // Copy result to host. - cudaMemcpy(output, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); - - // @@ Interleave color and alpha blocks. - -} - - - - - - -// @@ This code is very repetitive and needs to be cleaned up. - -#if 0 - -/// Compress image using CUDA. -void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - nvDebugCheck(cuda::isHardwarePresent()); -#if defined HAVE_CUDA - - // Image size in blocks. - const uint w = (m_image->width() + 3) / 4; - const uint h = (m_image->height() + 3) / 4; - - uint imageSize = w * h * 16 * sizeof(Color32); - uint * blockLinearImage = (uint *) malloc(imageSize); - convertToBlockLinear(m_image, blockLinearImage); - - const uint blockNum = w * h; - const uint compressedSize = blockNum * 8; - - AlphaBlockDXT3 * alphaBlocks = NULL; - alphaBlocks = (AlphaBlockDXT3 *)malloc(min(compressedSize, MAX_BLOCKS * 8U)); - - setupCompressKernel(compressionOptions.colorWeight.ptr()); - - clock_t start = clock(); - - uint bn = 0; - while(bn != blockNum) - { - uint count = min(blockNum - bn, MAX_BLOCKS); - - cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); - - // Launch kernel. - if (m_alphaMode == AlphaMode_Transparency) - { - compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); - } - else - { - compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); - } - - // Compress alpha in parallel with the GPU. - for (uint i = 0; i < count; i++) - { - ColorBlock rgba(blockLinearImage + (bn + i) * 16); - OptimalCompress::compressDXT3A(rgba, alphaBlocks + i); - } - - // Check for errors. - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); - - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } - } - - // Copy result to host, overwrite swizzled image. - cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); - - // Output result. - if (outputOptions.outputHandler != NULL) - { - for (uint i = 0; i < count; i++) - { - outputOptions.outputHandler->writeData(alphaBlocks + i, 8); - outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8); - } - } - - bn += count; - } - - clock_t end = clock(); - //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); - - free(alphaBlocks); - free(blockLinearImage); - -#else - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } -#endif -} - - -/// Compress image using CUDA. -void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - nvDebugCheck(cuda::isHardwarePresent()); -#if defined HAVE_CUDA - - // Image size in blocks. - const uint w = (m_image->width() + 3) / 4; - const uint h = (m_image->height() + 3) / 4; - - uint imageSize = w * h * 16 * sizeof(Color32); - uint * blockLinearImage = (uint *) malloc(imageSize); - convertToBlockLinear(m_image, blockLinearImage); - - const uint blockNum = w * h; - const uint compressedSize = blockNum * 8; - - AlphaBlockDXT5 * alphaBlocks = NULL; - alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, MAX_BLOCKS * 8U)); - - setupCompressKernel(compressionOptions.colorWeight.ptr()); - - clock_t start = clock(); - - uint bn = 0; - while(bn != blockNum) - { - uint count = min(blockNum - bn, MAX_BLOCKS); - - cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); - - // Launch kernel. - if (m_alphaMode == AlphaMode_Transparency) - { - compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); - } - else - { - compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); - } - - // Compress alpha in parallel with the GPU. - for (uint i = 0; i < count; i++) - { - ColorBlock rgba(blockLinearImage + (bn + i) * 16); - QuickCompress::compressDXT5A(rgba, alphaBlocks + i); - } - - // Check for errors. - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); - - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } - } - - // Copy result to host, overwrite swizzled image. - cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); - - // Output result. - if (outputOptions.outputHandler != NULL) - { - for (uint i = 0; i < count; i++) - { - outputOptions.outputHandler->writeData(alphaBlocks + i, 8); - outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8); - } - } - - bn += count; - } - - clock_t end = clock(); - //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); - - free(alphaBlocks); - free(blockLinearImage); - -#else - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } -#endif -} - - -void CudaCompressor::compressDXT1n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - nvDebugCheck(cuda::isHardwarePresent()); -#if defined HAVE_CUDA - - // Image size in blocks. - const uint w = (m_image->width() + 3) / 4; - const uint h = (m_image->height() + 3) / 4; - - uint imageSize = w * h * 16 * sizeof(Color32); - uint * blockLinearImage = (uint *) malloc(imageSize); - convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU! - - const uint blockNum = w * h; - const uint compressedSize = blockNum * 8; - - clock_t start = clock(); - - setupCompressKernel(compressionOptions.colorWeight.ptr()); - - // TODO: Add support for multiple GPUs. - uint bn = 0; - while(bn != blockNum) - { - uint count = min(blockNum - bn, MAX_BLOCKS); - - cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); - - // Launch kernel. - compressNormalKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); - - // Check for errors. - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); - - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } - } - - // Copy result to host, overwrite swizzled image. - cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); - - // Output result. - if (outputOptions.outputHandler != NULL) - { - outputOptions.outputHandler->writeData(blockLinearImage, count * 8); - } - - bn += count; - } - - clock_t end = clock(); - //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); - - free(blockLinearImage); - -#else - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } -#endif -} - - -void CudaCompressor::compressCTX1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - nvDebugCheck(cuda::isHardwarePresent()); -#if defined HAVE_CUDA - - // Image size in blocks. - const uint w = (m_image->width() + 3) / 4; - const uint h = (m_image->height() + 3) / 4; - - uint imageSize = w * h * 16 * sizeof(Color32); - uint * blockLinearImage = (uint *) malloc(imageSize); - convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU! - - const uint blockNum = w * h; - const uint compressedSize = blockNum * 8; - - clock_t start = clock(); - - setupCompressKernel(compressionOptions.colorWeight.ptr()); - - // TODO: Add support for multiple GPUs. - uint bn = 0; - while(bn != blockNum) - { - uint count = min(blockNum - bn, MAX_BLOCKS); - - cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); - - // Launch kernel. - compressKernelCTX1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTableCTX); - - // Check for errors. - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); - - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } - } - - // Copy result to host, overwrite swizzled image. - cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); - - // Output result. - if (outputOptions.outputHandler != NULL) - { - outputOptions.outputHandler->writeData(blockLinearImage, count * 8); - } - - bn += count; - } - - clock_t end = clock(); - //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); - - free(blockLinearImage); - -#else - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } -#endif -} - - -void CudaCompressor::compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - nvDebugCheck(cuda::isHardwarePresent()); -#if defined HAVE_CUDA - - // @@ TODO - -#else - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } -#endif -} - -#endif // 0 + + +/// Compress image using CUDA. +void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(cuda::isHardwarePresent()); +#if defined HAVE_CUDA + + // Image size in blocks. + const uint w = (m_image->width() + 3) / 4; + const uint h = (m_image->height() + 3) / 4; + + uint imageSize = w * h * 16 * sizeof(Color32); + uint * blockLinearImage = (uint *) malloc(imageSize); + convertToBlockLinear(m_image, blockLinearImage); + + const uint blockNum = w * h; + const uint compressedSize = blockNum * 8; + + AlphaBlockDXT3 * alphaBlocks = NULL; + alphaBlocks = (AlphaBlockDXT3 *)malloc(min(compressedSize, MAX_BLOCKS * 8U)); + + setupCompressKernel(compressionOptions.colorWeight.ptr()); + + clock_t start = clock(); + + uint bn = 0; + while(bn != blockNum) + { + uint count = min(blockNum - bn, MAX_BLOCKS); + + cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); + + // Launch kernel. + if (m_alphaMode == AlphaMode_Transparency) + { + compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable); + } + else + { + compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable); + } + + // Compress alpha in parallel with the GPU. + for (uint i = 0; i < count; i++) + { + ColorBlock rgba(blockLinearImage + (bn + i) * 16); + OptimalCompress::compressDXT3A(rgba, alphaBlocks + i); + } + + // Check for errors. + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); + + if (outputOptions.errorHandler != NULL) + { + outputOptions.errorHandler->error(Error_CudaError); + } + } + + // Copy result to host, overwrite swizzled image. + cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost); + + // Output result. + if (outputOptions.outputHandler != NULL) + { + for (uint i = 0; i < count; i++) + { + outputOptions.outputHandler->writeData(alphaBlocks + i, 8); + outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8); + } + } + + bn += count; + } + + clock_t end = clock(); + //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); + + free(alphaBlocks); + free(blockLinearImage); + +#else + if (outputOptions.errorHandler != NULL) + { + outputOptions.errorHandler->error(Error_CudaError); + } +#endif +} + + +/// Compress image using CUDA. +void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(cuda::isHardwarePresent()); +#if defined HAVE_CUDA + + // Image size in blocks. + const uint w = (m_image->width() + 3) / 4; + const uint h = (m_image->height() + 3) / 4; + + uint imageSize = w * h * 16 * sizeof(Color32); + uint * blockLinearImage = (uint *) malloc(imageSize); + convertToBlockLinear(m_image, blockLinearImage); + + const uint blockNum = w * h; + const uint compressedSize = blockNum * 8; + + AlphaBlockDXT5 * alphaBlocks = NULL; + alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, MAX_BLOCKS * 8U)); + + setupCompressKernel(compressionOptions.colorWeight.ptr()); + + clock_t start = clock(); + + uint bn = 0; + while(bn != blockNum) + { + uint count = min(blockNum - bn, MAX_BLOCKS); + + cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); + + // Launch kernel. + if (m_alphaMode == AlphaMode_Transparency) + { + compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable); + } + else + { + compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable); + } + + // Compress alpha in parallel with the GPU. + for (uint i = 0; i < count; i++) + { + ColorBlock rgba(blockLinearImage + (bn + i) * 16); + QuickCompress::compressDXT5A(rgba, alphaBlocks + i); + } + + // Check for errors. + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); + + if (outputOptions.errorHandler != NULL) + { + outputOptions.errorHandler->error(Error_CudaError); + } + } + + // Copy result to host, overwrite swizzled image. + cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost); + + // Output result. + if (outputOptions.outputHandler != NULL) + { + for (uint i = 0; i < count; i++) + { + outputOptions.outputHandler->writeData(alphaBlocks + i, 8); + outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8); + } + } + + bn += count; + } + + clock_t end = clock(); + //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); + + free(alphaBlocks); + free(blockLinearImage); + +#else + if (outputOptions.errorHandler != NULL) + { + outputOptions.errorHandler->error(Error_CudaError); + } +#endif +} + + diff --git a/src/nvtt/cuda/CudaCompressDXT.h b/src/nvtt/cuda/CudaCompressDXT.h index b60bdf3..f72bacc 100644 --- a/src/nvtt/cuda/CudaCompressDXT.h +++ b/src/nvtt/cuda/CudaCompressDXT.h @@ -27,86 +27,34 @@ #include #include -#include "nvtt/CompressDXT.h" - -struct cudaArray; - namespace nv { class Image; - class CudaContext + class CudaCompressor { public: - CudaContext(); - ~CudaContext(); + CudaCompressor(); + ~CudaCompressor(); bool isValid() const; - public: - // Device pointers. - uint * bitmapTable; - uint * bitmapTableCTX; - uint * data; - uint * result; - }; + void setImage(const Image * image, nvtt::AlphaMode alphaMode); + void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - struct CudaCompressor : public CompressorInterface - { - CudaCompressor(CudaContext & ctx); - - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - - virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) = 0; - virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; - virtual uint blockSize() const = 0; - - protected: - CudaContext & m_ctx; - }; + private: - struct CudaCompressorDXT1 : public CudaCompressor - { - CudaCompressorDXT1(CudaContext & ctx) : CudaCompressor(ctx) {} - - virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions); - virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; }; + uint * m_bitmapTable; + uint * m_data; + uint * m_result; + + const Image * m_image; + nvtt::AlphaMode m_alphaMode; }; - /*struct CudaCompressorDXT1n : public CudaCompressor - { - virtual void setup(const CompressionOptions::Private & compressionOptions); - virtual void compressBlocks(uint blockCount, const void * input, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; - virtual uint blockSize() const { return 8; }; - };*/ - - struct CudaCompressorDXT3 : public CudaCompressor - { - CudaCompressorDXT3(CudaContext & ctx) : CudaCompressor(ctx) {} - - virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions); - virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; }; - }; - - struct CudaCompressorDXT5 : public CudaCompressor - { - CudaCompressorDXT5(CudaContext & ctx) : CudaCompressor(ctx) {} - - virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions); - virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; }; - }; - - /*struct CudaCompressorCXT1 : public CudaCompressor - { - virtual void setup(const CompressionOptions::Private & compressionOptions); - virtual void compressBlocks(uint blockCount, const void * input, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; - virtual uint blockSize() const { return 8; }; - };*/ - } // nv namespace diff --git a/src/nvtt/cuda/CudaMath.h b/src/nvtt/cuda/CudaMath.h index c8d1a56..50af320 100644 --- a/src/nvtt/cuda/CudaMath.h +++ b/src/nvtt/cuda/CudaMath.h @@ -26,6 +26,7 @@ #ifndef CUDAMATH_H #define CUDAMATH_H +#include inline __device__ __host__ float3 operator *(float3 a, float3 b) @@ -86,69 +87,6 @@ inline __device__ __host__ bool operator ==(float3 a, float3 b) return a.x == b.x && a.y == b.y && a.z == b.z; } - -// float2 operators -inline __device__ __host__ float2 operator *(float2 a, float2 b) -{ - return make_float2(a.x*b.x, a.y*b.y); -} - -inline __device__ __host__ float2 operator *(float f, float2 v) -{ - return make_float2(v.x*f, v.y*f); -} - -inline __device__ __host__ float2 operator *(float2 v, float f) -{ - return make_float2(v.x*f, v.y*f); -} - -inline __device__ __host__ float2 operator +(float2 a, float2 b) -{ - return make_float2(a.x+b.x, a.y+b.y); -} - -inline __device__ __host__ void operator +=(float2 & b, float2 a) -{ - b.x += a.x; - b.y += a.y; -} - -inline __device__ __host__ float2 operator -(float2 a, float2 b) -{ - return make_float2(a.x-b.x, a.y-b.y); -} - -inline __device__ __host__ void operator -=(float2 & b, float2 a) -{ - b.x -= a.x; - b.y -= a.y; -} - -inline __device__ __host__ float2 operator /(float2 v, float f) -{ - float inv = 1.0f / f; - return v * inv; -} - -inline __device__ __host__ void operator /=(float2 & b, float f) -{ - float inv = 1.0f / f; - b.x *= inv; - b.y *= inv; -} - -inline __device__ __host__ bool operator ==(float2 a, float2 b) -{ - return a.x == b.x && a.y == b.y; -} - - -inline __device__ __host__ float dot(float2 a, float2 b) -{ - return a.x * b.x + a.y * b.y; -} - inline __device__ __host__ float dot(float3 a, float3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; @@ -181,16 +119,6 @@ inline __device__ __host__ float3 normalize(float3 v) return make_float3(v.x * len, v.y * len, v.z * len); } -inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) -{ - const float s = 1.0f - t; - return make_float3(s * a.x + t * b.x, s * a.y + t * b.y, s * a.z + t * b.z); -} - -inline __device__ __host__ float lengthSquared(float3 a) -{ - return dot(a, a); -} @@ -315,89 +243,5 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, fl return firstEigenVector(covariance); } -// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right? -inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] ) -{ - // @@ 8 iterations is probably more than enough. - - float2 v = make_float2(1.0f, 1.0f); - for(int i = 0; i < 8; i++) { - float x = v.x * matrix[0] + v.y * matrix[1]; - float y = v.x * matrix[1] + v.y * matrix[2]; - float m = max(x, y); - float iv = 1.0f / m; - if (m == 0.0f) iv = 0.0f; - v = make_float2(x*iv, y*iv); - } - - return v; -} - -inline __device__ void colorSums(const float2 * colors, float2 * sums) -{ -#if __DEVICE_EMULATION__ - float2 color_sum = make_float2(0.0f, 0.0f); - for (int i = 0; i < 16; i++) - { - color_sum += colors[i]; - } - - for (int i = 0; i < 16; i++) - { - sums[i] = color_sum; - } -#else - - const int idx = threadIdx.x; - - sums[idx] = colors[idx]; - sums[idx] += sums[idx^8]; - sums[idx] += sums[idx^4]; - sums[idx] += sums[idx^2]; - sums[idx] += sums[idx^1]; - -#endif -} - -inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum) -{ - // Compute covariance matrix of the given colors. -#if __DEVICE_EMULATION__ - float covariance[3] = {0, 0, 0}; - for (int i = 0; i < 16; i++) - { - float2 a = (colors[i] - color_sum * (1.0f / 16.0f)); - covariance[0] += a.x * a.x; - covariance[1] += a.x * a.y; - covariance[2] += a.y * a.y; - } -#else - - const int idx = threadIdx.x; - - float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f)); - - __shared__ float covariance[16*3]; - - covariance[3 * idx + 0] = diff.x * diff.x; - covariance[3 * idx + 1] = diff.x * diff.y; - covariance[3 * idx + 2] = diff.y * diff.y; - - for(int d = 8; d > 0; d >>= 1) - { - if (idx < d) - { - covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0]; - covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1]; - covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2]; - } - } - -#endif - - // Compute first eigen vector. - return firstEigenVector2D(covariance); -} - #endif // CUDAMATH_H diff --git a/src/nvtt/cuda/CudaUtils.cpp b/src/nvtt/cuda/CudaUtils.cpp index 7bb2b09..3cca04d 100644 --- a/src/nvtt/cuda/CudaUtils.cpp +++ b/src/nvtt/cuda/CudaUtils.cpp @@ -1,239 +1,300 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include -#include "CudaUtils.h" - -#if defined HAVE_CUDA -#include -#include -#endif - -using namespace nv; -using namespace cuda; - -/* @@ Move this to win32 utils or somewhere else. -#if NV_OS_WIN32 - -#define WINDOWS_LEAN_AND_MEAN -#include - -static bool isWindowsVista() -{ - OSVERSIONINFO osvi; - osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); - - ::GetVersionEx(&osvi); - return osvi.dwMajorVersion >= 6; -} - - -typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); - -static bool isWow32() -{ - LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process"); - - BOOL bIsWow64 = FALSE; - - if (NULL != fnIsWow64Process) - { - if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) - { - // Assume 32 bits. - return true; - } - } - - return !bIsWow64; -} - -#endif -*/ - - -static bool isCudaDriverAvailable(int version) -{ -#if defined HAVE_CUDA -#if NV_OS_WIN32 - Library nvcuda("nvcuda.dll"); -#else - Library nvcuda(NV_LIBRARY_NAME(cuda)); -#endif - - if (!nvcuda.isValid()) - { - nvDebug("*** CUDA driver not found.\n"); - return false; - } - - if (version >= 2000) - { - void * address = nvcuda.bindSymbol("cuStreamCreate"); - if (address == NULL) { - nvDebug("*** CUDA driver version < 2.0.\n"); - return false; - } - } - - if (version >= 2010) - { - void * address = nvcuda.bindSymbol("cuModuleLoadDataEx"); - if (address == NULL) { - nvDebug("*** CUDA driver version < 2.1.\n"); - return false; - } - } - - if (version >= 2020) - { - typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version); - - PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion"); - if (driverGetVersion == NULL) { - nvDebug("*** CUDA driver version < 2.2.\n"); - return false; - } - - int driverVersion; - CUresult err = driverGetVersion(&driverVersion); - if (err != CUDA_SUCCESS) { - nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err)); - return false; - } - - return driverVersion >= version; - } -#endif // HAVE_CUDA - - return true; -} - - -/// Determine if CUDA is available. -bool nv::cuda::isHardwarePresent() -{ -#if defined HAVE_CUDA - // Make sure that CUDA driver matches CUDA runtime. - if (!isCudaDriverAvailable(CUDART_VERSION)) - { - nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION); - return false; - } - - int count = deviceCount(); - if (count == 1) - { - // Make sure it's not an emulation device. - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - - // deviceProp.name != Device Emulation (CPU) - if (deviceProp.major == -1 || deviceProp.minor == -1) - { - return false; - } - } - - // @@ Make sure that warp size == 32 - - return count > 0; -#else - return false; -#endif -} - -/// Get number of CUDA enabled devices. -int nv::cuda::deviceCount() -{ -#if defined HAVE_CUDA - int gpuCount = 0; - - cudaError_t result = cudaGetDeviceCount(&gpuCount); - - if (result == cudaSuccess) - { - return gpuCount; - } -#endif - return 0; -} - -int nv::cuda::getFastestDevice() -{ - int max_gflops_device = 0; -#if defined HAVE_CUDA - int max_gflops = 0; - - const int device_count = deviceCount(); - int current_device = 0; - while (current_device < device_count) - { - cudaDeviceProp device_properties; - cudaGetDeviceProperties(&device_properties, current_device); - int gflops = device_properties.multiProcessorCount * device_properties.clockRate; - - if (device_properties.major != -1 && device_properties.minor != -1) - { - if( gflops > max_gflops ) - { - max_gflops = gflops; - max_gflops_device = current_device; - } - } - - current_device++; - } -#endif - return max_gflops_device; -} - - -/// Activate the given devices. -bool nv::cuda::setDevice(int i) -{ - nvCheck(i < deviceCount()); -#if defined HAVE_CUDA - cudaError_t result = cudaSetDevice(i); - - if (result != cudaSuccess) { - nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); - } - - return result == cudaSuccess; -#else - return false; -#endif -} - -void nv::cuda::exit() -{ -#if defined HAVE_CUDA - cudaError_t result = cudaThreadExit(); - - if (result != cudaSuccess) { - nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); - } -#endif -} +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include +#include +#include "CudaUtils.h" + +#if defined HAVE_CUDA +#include +#include +#endif + +using namespace nv; +using namespace cuda; + +/* @@ Move this to win32 utils or somewhere else. +#if NV_OS_WIN32 + +#define WINDOWS_LEAN_AND_MEAN +#include + +static bool isWindowsVista() +{ +OSVERSIONINFO osvi; +osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + +::GetVersionEx(&osvi); +return osvi.dwMajorVersion >= 6; +} + + +typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); + +static bool isWow32() +{ +LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process"); + +BOOL bIsWow64 = FALSE; + +if (NULL != fnIsWow64Process) +{ +if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) +{ +// Assume 32 bits. +return true; +} +} + +return !bIsWow64; +} + +#endif +*/ + + +static bool isCudaDriverAvailable(int version) +{ +#if defined HAVE_CUDA +#if NV_OS_WIN32 + Library nvcuda("nvcuda.dll"); +#else + Library nvcuda(NV_LIBRARY_NAME(cuda)); +#endif + + if (!nvcuda.isValid()) + { + nvDebug("*** CUDA driver not found.\n"); + return false; + } + + if (version >= 2000) + { + void * address = nvcuda.bindSymbol("cuStreamCreate"); + if (address == NULL) { + nvDebug("*** CUDA driver version < 2.0.\n"); + return false; + } + } + + if (version >= 2010) + { + void * address = nvcuda.bindSymbol("cuModuleLoadDataEx"); + if (address == NULL) { + nvDebug("*** CUDA driver version < 2.1.\n"); + return false; + } + } + + if (version >= 2020) + { + typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version); + + PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion"); + if (driverGetVersion == NULL) { + nvDebug("*** CUDA driver version < 2.2.\n"); + return false; + } + + int driverVersion; + CUresult err = driverGetVersion(&driverVersion); + if (err != CUDA_SUCCESS) { + nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err)); + return false; + } + + return driverVersion >= version; + } +#endif // HAVE_CUDA + + return true; +} + + +/// Determine if CUDA is available. +bool nv::cuda::isHardwarePresent() +{ +#if defined HAVE_CUDA + // Make sure that CUDA driver matches CUDA runtime. + if (!isCudaDriverAvailable(CUDART_VERSION)) + { + nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION); + return false; + } + + int count = deviceCount(); + if (count == 1) + { + // Make sure it's not an emulation device. + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + + // deviceProp.name != Device Emulation (CPU) + if (deviceProp.major == -1 || deviceProp.minor == -1) + { + return false; + } + } + + // @@ Make sure that warp size == 32 + + // @@ Make sure available GPU is faster than the CPU. + + return count > 0; +#else + return false; +#endif +} + +/// Get number of CUDA enabled devices. +int nv::cuda::deviceCount() +{ +#if defined HAVE_CUDA + int gpuCount = 0; + + cudaError_t result = cudaGetDeviceCount(&gpuCount); + + if (result == cudaSuccess) + { + return gpuCount; + } +#endif + return 0; +} + + +// Make sure device meets requirements: +// - Not an emulation device. +// - Not an integrated device? +// - Faster than CPU. +bool nv::cuda::isValidDevice(int i) +{ +#if defined HAVE_CUDA + + cudaDeviceProp device_properties; + cudaGetDeviceProperties(&device_properties, i); + int gflops = device_properties.multiProcessorCount * device_properties.clockRate; + + if (device_properties.major == -1 || device_properties.minor == -1) { + // Emulation device. + return false; + } + +#if CUDART_VERSION >= 2030 // 2.3 + /*if (device_properties.integrated) + { + // Integrated devices. + return false; + }*/ +#endif + + return true; +#else + return false; +#endif +} + +int nv::cuda::getFastestDevice() +{ + int max_gflops_device = -1; +#if defined HAVE_CUDA + int max_gflops = 0; + + const int device_count = deviceCount(); + for (int i = 0; i < device_count; i++) + { + if (isValidDevice(i)) + { + cudaDeviceProp device_properties; + cudaGetDeviceProperties(&device_properties, i); + int gflops = device_properties.multiProcessorCount * device_properties.clockRate; + + if (gflops > max_gflops) + { + max_gflops = gflops; + max_gflops_device = i; + } + } + } +#endif + return max_gflops_device; +} + + +/// Activate the given devices. +bool nv::cuda::initDevice(int * device_ptr) +{ + nvDebugCheck(device_ptr != NULL); +#if defined HAVE_CUDA + +#if CUDART_VERSION >= 2030 // 2.3 + + // Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active. + cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield); + +#endif + + int device = getFastestDevice(); + + if (device == -1) + { + // No device is fast enough. + *device_ptr = -1; + return false; + } + + // Select CUDA device. + cudaError_t result = cudaSetDevice(device); + + if (result == cudaErrorSetOnActiveProcess) + { + int device; + result = cudaGetDevice(&device); + + *device_ptr = -1; // No device to cleanup. + return isValidDevice(device); // Return true if device is valid. + } + else if (result != cudaSuccess) + { + nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); + *device_ptr = -1; + return false; + } + + *device_ptr = device; + return true; +#else + return false; +#endif +} + +void nv::cuda::exitDevice() +{ +#if defined HAVE_CUDA + cudaError_t result = cudaThreadExit(); + + if (result != cudaSuccess) { + nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); + } +#endif +} diff --git a/src/nvtt/cuda/CudaUtils.h b/src/nvtt/cuda/CudaUtils.h index 376bbe1..7128b4d 100644 --- a/src/nvtt/cuda/CudaUtils.h +++ b/src/nvtt/cuda/CudaUtils.h @@ -32,8 +32,10 @@ namespace nv bool isHardwarePresent(); int deviceCount(); int getFastestDevice(); - bool setDevice(int i); - void exit(); + bool isValidDevice(int i); + + bool initDevice(int * device_ptr); + void exitDevice(); }; } // nv namespace diff --git a/src/nvtt/experimental/nvtt_experimental.cpp b/src/nvtt/experimental/nvtt_experimental.cpp deleted file mode 100644 index 7d612aa..0000000 --- a/src/nvtt/experimental/nvtt_experimental.cpp +++ /dev/null @@ -1,60 +0,0 @@ - -#include "nvtt_experimental.h" - -struct NvttTexture -{ - NvttTexture() : - m_constant(false), - m_image(NULL), - m_floatImage(NULL) - { - } - - ~NvttTexture() - { - if (m_constant && m_image) m_image->unwrap(); - delete m_image; - delete m_floatImage; - } - - bool m_constant; - Image * m_image; - FloatImage * m_floatImage; -}; - -NvttTexture * nvttCreateTexture() -{ - return new NvttTexture(); -} - -void nvttDestroyTexture(NvttTexture * tex) -{ - delete tex; -} - -void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data) -{ - nvCheck(img != NULL); - - if (format == NVTT_InputFormat_BGRA_8UB) - { - img->m_constant = false; - img->m_image->allocate(w, h); - memcpy(img->m_image->pixels(), data, w * h * 4); - } - else - { - nvCheck(false); - } -} - -void nvttCompressImage(NvttImage * img, NvttFormat format) -{ - nvCheck(img != NULL); - - // @@ Invoke appropriate compressor. -} - - - -#endif // NVTT_EXPERIMENTAL_H diff --git a/src/nvtt/experimental/nvtt_experimental.h b/src/nvtt/experimental/nvtt_experimental.h deleted file mode 100644 index 22991f7..0000000 --- a/src/nvtt/experimental/nvtt_experimental.h +++ /dev/null @@ -1,103 +0,0 @@ - -#ifndef NVTT_EXPERIMENTAL_H -#define NVTT_EXPERIMENTAL_H - -#include - -typedef struct NvttTexture NvttTexture; -typedef struct NvttOutputOptions NvttOutputOptions; - - -// Global functions -void nvttInitialize(...); -unsigned int nvttGetVersion(); -const char * nvttGetErrorString(unsigned int error); - - -// Texture functions -NvttTexture * nvttCreateTexture(); -void nvttDestroyTexture(NvttTexture * tex); - -void nvttSetTexture2D(NvttTexture * tex, NvttInputFormat format, uint w, uint h, uint idx, void * data); - -void nvttResize(NvttTexture * img, uint w, uint h); -unsigned int nvttDownsample(NvttTexture * img); - -void nvttOutputCompressed(NvttTexture * img, NvttOutputFormat format); -void nvttOutputPixelFormat(NvttTexture * img, NvttOutputFormat format); - - - - -// How to control the compression parameters? - -// Using many arguments: -// void nvttCompressImage(img, format, quality, r, g, b, a, ...); - -// Using existing compression option class: -// compressionOptions = nvttCreateCompressionOptions(); -// nvttSetCompressionOptionsFormat(compressionOptions, format); -// nvttSetCompressionOptionsQuality(compressionOptions, quality); -// nvttSetCompressionOptionsQuality(compressionOptions, quality); -// nvttSetCompressionOptionsColorWeights(compressionOptions, r, g, b, a); -// ... -// nvttCompressImage(img, compressionOptions); - -// Using thread local context state: -// void nvttSetCompressionFormat(format); -// void nvttSetCompressionQuality(quality); -// void nvttSetCompressionColorWeights(r, g, b, a); -// ... -// nvttCompressImage(img); - -// Using thread local context state, but with GL style function arguments: -// nvttCompressorParameteri(NVTT_FORMAT, format); -// nvttCompressorParameteri(NVTT_QUALITY, quality); -// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_RED, r); -// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_GREEN, g); -// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_BLUE, b); -// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_ALPHA, a); -// or nvttCompressorParameter4f(NVTT_COLOR_WEIGHTS, r, g, b, a); -// ... -// nvttCompressImage(img); - -// How do we get the compressed output? -// - Using callbacks. (via new entrypoints, or through outputOptions) -// - Return it explicitely from nvttCompressImage. -// - Store it along the image, retrieve later explicitely with 'nvttGetCompressedData(img, ...)' - -/* - -// Global functions -void nvttInitialize(...); -unsigned int nvttGetVersion(); -const char * nvttGetErrorString(unsigned int error); - -// Context object -void nvttCreateContext(); -void nvttDestroyContext(); - -void nvttSetParameter1i(unsigned int name, int value); - -void nvttSetParameter1f(unsigned int name, float value); -void nvttSetParameter2f(unsigned int name, float v0, float v1); -void nvttSetParameter3f(unsigned int name, float v0, float v1, float v2); -void nvttSetParameter4f(unsigned int name, float v0, float v1, float v2, float v3); - -// Image object -NvttImage * nvttCreateImage(); -void nvttDestroyImage(NvttImage * img); - -void nvttSetImageData(NvttImage * image, NvttInputFormat format, unsigned int w, unsigned int h, void * data); - -void nvttSetImageParameter1i(NvttImage * image, unsigned int name, int value); -void nvttSetImageParameter1f(NvttImage * image, unsigned int name, float value); - -void nvttResizeImage(NvttImage * image, unsigned int w, unsigned int h); -void nvttQuantizeImage(NvttImage * image, bool dither, unsigned int rbits, unsigned int gbits, unsigned int bbits, unsigned int abits); -void nvttCompressImage(NvttImage * image, void * buffer, int size); - -*/ - - -#endif // NVTT_EXPERIMENTAL_H diff --git a/src/nvtt/experimental/test.cpp b/src/nvtt/experimental/test.cpp deleted file mode 100644 index a60ee61..0000000 --- a/src/nvtt/experimental/test.cpp +++ /dev/null @@ -1,61 +0,0 @@ - -#include "nvtt_experimental.h" - -/* -Errors in the original API: -- Too many memory copies. -- Implementation too complicated. -- Error output should not be in output options. -- Data driven interface. Follows the dialog model. Provide all the data upfront. -*/ - - -// Output texture with mipmaps -void example0() -{ - CompressionOptions compressionOptions; - OutputOptions outputOptions; - - Texture img; - img.setTexture2D(format, w, h, 0, data); - - Compressor context; - context.outputHeader(outputOptions); - context.outputCompressed(img, compressionOptions, outputOptions); - - img.toLinear(2.2); - while (img.downsample(NVTT_FILTER_BOX)) - { - img.toGamma(2.2); - outputCompressed(img, compressionOptions, outputOptions); - } -} - - -// Output texture with colored mipmaps -void example1() -{ - CompressionOptions compressionOptions; - OutputOptions outputOptions; - - Texture img; - img.setTexture2D(format, w, h, 0, data); - - Compressor context; - context.outputHeader(outputOptions); - context.outputCompressed(img, compressionOptions, outputOptions); - - img.toLinear(2.2); - while (img.downsample(NVTT_FILTER_BOX)) - { - img.toGamma(2.2); - - Texture mipmap = img; - mipmap.blend(color[i].r, color[i].g, color[i].b, 0.5f); - - context.outputCompressed(mipmap, compressionOptions, outputOptions); - } -} - - - diff --git a/src/nvtt/nvtt.cpp b/src/nvtt/nvtt.cpp index 8dc5cb5..5c9e884 100644 --- a/src/nvtt/nvtt.cpp +++ b/src/nvtt/nvtt.cpp @@ -42,8 +42,6 @@ const char * nvtt::errorString(Error e) return "Error opening file"; case Error_FileWrite: return "Error writing through output handler"; - case Error_UnsupportedOutputFormat: - return "The container file does not support the selected output format"; } return "Invalid error"; diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index 8a4d8da..b5d2e72 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -47,15 +47,12 @@ # define NVTT_API #endif -#define NVTT_VERSION 201 +#define NVTT_VERSION 200 -#define NVTT_FORBID_COPY(Class) \ +#define NVTT_DECLARE_PIMPL(Class) \ private: \ Class(const Class &); \ void operator=(const Class &); \ - public: - -#define NVTT_DECLARE_PIMPL(Class) \ public: \ struct Private; \ Private & m @@ -64,9 +61,6 @@ // Public interface. namespace nvtt { - // Forward declarations. - struct TexImage; - /// Supported compression formats. enum Format { @@ -89,23 +83,6 @@ namespace nvtt Format_BC3n = Format_DXT5n, Format_BC4, // ATI1 Format_BC5, // 3DC, ATI2 - - Format_DXT1n, - Format_CTX1, - Format_YCoCg_DXT5, - - Format_BC6, - Format_BC7, - }; - - /// Pixel types. - enum PixelType - { - PixelType_UnsignedNorm, - PixelType_SignedNorm, - PixelType_UnsignedInt, - PixelType_SignedInt, - PixelType_Float, }; /// Quality modes. @@ -120,7 +97,6 @@ namespace nvtt /// Compression options. This class describes the desired compression format and other compression settings. struct CompressionOptions { - NVTT_FORBID_COPY(CompressionOptions); NVTT_DECLARE_PIMPL(CompressionOptions); NVTT_API CompressionOptions(); @@ -136,23 +112,10 @@ namespace nvtt // Set color mask to describe the RGB/RGBA format. NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask); - NVTT_API void setPixelFormat(unsigned char rsize, unsigned char gsize, unsigned char bsize, unsigned char asize); - - NVTT_API void setPixelType(PixelType pixelType); NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127); }; - /* - // DXGI_FORMAT_R16G16_FLOAT - compressionOptions.setPixelType(PixelType_Float); - compressionOptions.setPixelFormat2(16, 16, 0, 0); - - // DXGI_FORMAT_R32G32B32A32_FLOAT - compressionOptions.setPixelType(PixelType_Float); - compressionOptions.setPixelFormat2(32, 32, 32, 32); - */ - /// Wrap modes. enum WrapMode @@ -174,7 +137,8 @@ namespace nvtt enum InputFormat { InputFormat_BGRA_8UB, - InputFormat_RGBA_32F, + // InputFormat_RGBE_8UB, + // InputFormat_BGRA_32F, }; /// Mipmap downsampling filters. @@ -185,23 +149,11 @@ namespace nvtt MipmapFilter_Kaiser, ///< Kaiser-windowed Sinc filter is the best downsampling filter. }; - /// Texture resize filters. - enum ResizeFilter - { - ResizeFilter_Box, - ResizeFilter_Triangle, - ResizeFilter_Kaiser, - ResizeFilter_Mitchell, - }; - /// Color transformation. enum ColorTransform { ColorTransform_None, - ColorTransform_Linear, ///< Not implemented. - ColorTransform_Swizzle, ///< Not implemented. - ColorTransform_YCoCg, ///< Transform into r=Co, g=Cg, b=0, a=Y - ColorTransform_ScaledYCoCg, ///< Not implemented. + ColorTransform_Linear, }; /// Extents rounding mode. @@ -224,7 +176,6 @@ namespace nvtt /// Input options. Specify format and layout of the input texture. struct InputOptions { - NVTT_FORBID_COPY(InputOptions); NVTT_DECLARE_PIMPL(InputOptions); NVTT_API InputOptions(); @@ -239,18 +190,17 @@ namespace nvtt // Set mipmap data. Copies the data. NVTT_API bool setMipmapData(const void * data, int w, int h, int d = 1, int face = 0, int mipmap = 0); - NVTT_API bool setMipmapChannelData(const void * data, int channel, int w, int h, int d = 1, int face = 0, int mipmap = 0); // Describe the format of the input. NVTT_API void setFormat(InputFormat format); - // Set the way the input alpha channel is interpreted. @@ Not implemented! + // Set the way the input alpha channel is interpreted. NVTT_API void setAlphaMode(AlphaMode alphaMode); // Set gamma settings. NVTT_API void setGamma(float inputGamma, float outputGamma); - // Set texture wrapping mode. + // Set texture wrappign mode. NVTT_API void setWrapMode(WrapMode mode); // Set mipmapping options. @@ -265,18 +215,13 @@ namespace nvtt NVTT_API void setNormalFilter(float sm, float medium, float big, float large); NVTT_API void setNormalizeMipmaps(bool b); - // Set color transforms. + // Set color transforms. @@ Not implemented! NVTT_API void setColorTransform(ColorTransform t); NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3); - NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3, float offset); - NVTT_API void setSwizzleTransform(int x, int y, int z, int w); // Set resizing options. NVTT_API void setMaxExtents(int d); NVTT_API void setRoundMode(RoundMode mode); - - // Set whether or not to premultiply color by alpha - NVTT_API void setPremultiplyAlpha(bool b); }; @@ -301,7 +246,6 @@ namespace nvtt Error_CudaError, Error_FileOpen, Error_FileWrite, - Error_UnsupportedOutputFormat, }; /// Error handler. @@ -313,19 +257,11 @@ namespace nvtt virtual void error(Error e) = 0; }; - /// Container. - enum Container - { - Container_DDS, - Container_DDS10, - }; - /// Output Options. This class holds pointers to the interfaces that are used to report the output of /// the compressor to the user. struct OutputOptions { - NVTT_FORBID_COPY(OutputOptions); NVTT_DECLARE_PIMPL(OutputOptions); NVTT_API OutputOptions(); @@ -339,129 +275,34 @@ namespace nvtt NVTT_API void setOutputHandler(OutputHandler * outputHandler); NVTT_API void setErrorHandler(ErrorHandler * errorHandler); NVTT_API void setOutputHeader(bool outputHeader); - NVTT_API void setContainer(Container container); }; - /// Context. + /// Texture compressor. struct Compressor { - NVTT_FORBID_COPY(Compressor); NVTT_DECLARE_PIMPL(Compressor); NVTT_API Compressor(); NVTT_API ~Compressor(); - // Context settings. NVTT_API void enableCudaAcceleration(bool enable); NVTT_API bool isCudaAccelerationEnabled() const; - // InputOptions api. + // Main entrypoint of the compression library. NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + + // Estimate the size of compressing the input with the given options. NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const; - - // RAW api. - NVTT_API bool compress2D(InputFormat format, int w, int h, void * data, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; - //ßNVTT_API bool compress3D(InputFormat format, int w, int h, int d, void * data, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; - NVTT_API int estimateSize(int w, int h, int d, const CompressionOptions & compressionOptions) const; - - // TexImage api. - NVTT_API TexImage createTexImage() const; - NVTT_API bool outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; - NVTT_API bool compress(const TexImage & tex, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; - NVTT_API int estimateSize(const TexImage & tex, const CompressionOptions & compressionOptions) const; }; - - // "Compressor" is deprecated. This should have been called "Context" - typedef Compressor Context; - - /// DXT decoder. - enum Decoder - { - Decoder_Reference, - Decoder_NV5x, - }; - - /// A texture mipmap. - struct TexImage - { - NVTT_API TexImage(); - NVTT_API TexImage(const TexImage & tex); - NVTT_API ~TexImage(); - - NVTT_API void operator=(const TexImage & tex); - - // Texture parameters. - NVTT_API void setTextureType(TextureType type); - NVTT_API void setWrapMode(WrapMode mode); - NVTT_API void setAlphaMode(AlphaMode alphaMode); - NVTT_API void setNormalMap(bool isNormalMap); - - // Accessors. - NVTT_API int width() const; - NVTT_API int height() const; - NVTT_API int depth() const; - NVTT_API int faceCount() const; - NVTT_API TextureType textureType() const; - NVTT_API WrapMode wrapMode() const; - NVTT_API AlphaMode alphaMode() const; - NVTT_API bool isNormalMap() const; - NVTT_API int countMipmaps() const; - - // Texture data. - NVTT_API bool load(const char * fileName); - NVTT_API bool save(const char * fileName) const; - NVTT_API bool setImage2D(InputFormat format, int w, int h, int idx, const void * data); - NVTT_API bool setImage2D(InputFormat format, int w, int h, int idx, const void * r, const void * g, const void * b, const void * a); - NVTT_API bool setImage2D(Format format, Decoder decoder, int w, int h, int idx, const void * data); - - // Resizing methods. - NVTT_API void resize(int w, int h, ResizeFilter filter); - NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter); - NVTT_API bool buildNextMipmap(MipmapFilter filter); - - // Color transforms. - NVTT_API void toLinear(float gamma); - NVTT_API void toGamma(float gamma); - NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]); - NVTT_API void swizzle(int r, int g, int b, int a); - NVTT_API void scaleBias(int channel, float scale, float bias); - NVTT_API void packNormal(); - NVTT_API void expandNormal(); - NVTT_API void blend(float r, float g, float b, float a, float t); - NVTT_API void premultiplyAlpha(); - NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale); - NVTT_API void setBorder(float r, float g, float b, float a); - NVTT_API void fill(float r, float g, float b, float a); - - // Set normal map options. - NVTT_API void toNormalMap(float sm, float medium, float big, float large); - NVTT_API void toHeightMap(); - NVTT_API void normalizeNormalMap(); - - // Error compare. - NVTT_API float rootMeanSquaredError_rgb(const TexImage & reference) const; - NVTT_API float rootMeanSquaredError_alpha(const TexImage & reference) const; - - private: - void detach(); - - struct Private; - Private * m; - }; - - + // Return string for the given error code. NVTT_API const char * errorString(Error e); // Return NVTT version. NVTT_API unsigned int version(); - // Set callbacks. - //NVTT_API void setErrorCallback(ErrorCallback callback); - //NVTT_API void setMemoryCallbacks(...); - } // nvtt namespace #endif // NV_TT_H diff --git a/src/nvtt/nvtt_wrapper.cpp b/src/nvtt/nvtt_wrapper.cpp index c20f70c..66c16aa 100644 --- a/src/nvtt/nvtt_wrapper.cpp +++ b/src/nvtt/nvtt_wrapper.cpp @@ -185,16 +185,6 @@ void nvttSetOutputOptionsOutputHandler(NvttOutputOptions * outputOptions, nvttOu // Compressor class. -NvttCompressor * nvttCreateCompressor() -{ - return new nvtt::Compressor(); -} - -void nvttDestroyCompressor(NvttCompressor * compressor) -{ - delete compressor; -} - NvttBoolean nvttCompress(const NvttCompressor * compressor, const NvttInputOptions * inputOptions, const NvttCompressionOptions * compressionOptions, const NvttOutputOptions * outputOptions) { return (NvttBoolean)compressor->process(*inputOptions, *compressionOptions, *outputOptions); diff --git a/src/nvtt/nvtt_wrapper.h b/src/nvtt/nvtt_wrapper.h index d7c94f7..100a4a3 100644 --- a/src/nvtt/nvtt_wrapper.h +++ b/src/nvtt/nvtt_wrapper.h @@ -47,7 +47,7 @@ # define NVTT_API #endif -#define NVTT_VERSION 201 +#define NVTT_VERSION 200 #ifdef __cplusplus typedef struct nvtt::InputOptions NvttInputOptions; @@ -156,7 +156,6 @@ typedef enum NVTT_Error_Unknown, NVTT_Error_FileOpen, NVTT_Error_FileWrite, - NVTT_Error_UnsupportedOutputFormat, } NvttError; typedef enum diff --git a/src/nvtt/squish/Makefile b/src/nvtt/squish/Makefile new file mode 100644 index 0000000..75a72fe --- /dev/null +++ b/src/nvtt/squish/Makefile @@ -0,0 +1,31 @@ + +include config + +SRC = alpha.cpp clusterfit.cpp colourblock.cpp colourfit.cpp colourset.cpp maths.cpp rangefit.cpp singlecolourfit.cpp squish.cpp + +OBJ = $(SRC:%.cpp=%.o) + +LIB = libsquish.a + +all : $(LIB) + +install : $(LIB) + install squish.h $(INSTALL_DIR)/include + install libsquish.a $(INSTALL_DIR)/lib + +uninstall: + $(RM) $(INSTALL_DIR)/include/squish.h + $(RM) $(INSTALL_DIR)/lib/libsquish.a + +$(LIB) : $(OBJ) + $(AR) cr $@ $? + ranlib $@ + +%.o : %.cpp + $(CXX) $(CPPFLAGS) -I. $(CXXFLAGS) -o$@ -c $< + +clean : + $(RM) $(OBJ) $(LIB) + + + diff --git a/src/nvtt/squish/clusterfit.cpp b/src/nvtt/squish/clusterfit.cpp index 8411478..186020c 100644 --- a/src/nvtt/squish/clusterfit.cpp +++ b/src/nvtt/squish/clusterfit.cpp @@ -28,7 +28,7 @@ #include "colourblock.h" #include -namespace nvsquish { +namespace squish { ClusterFit::ClusterFit() { @@ -280,6 +280,15 @@ void ClusterFit::Compress4( void* block ) m_beta[k] = m_weights[k]; } + /*unsigned int permutation = 0; + for(int p = 0; p < 16; p++) { + permutation |= indices[p] << (p * 2); + } + if (debug) printf("%X:\t", permutation); + + if (debug && permutation == 0x55FFFFAA) __debugbreak(); + */ + // solve a least squares problem to place the endpoints #if SQUISH_USE_SIMD Vec4 start, end; diff --git a/src/nvtt/squish/clusterfit.h b/src/nvtt/squish/clusterfit.h index e115cd3..a870dc4 100644 --- a/src/nvtt/squish/clusterfit.h +++ b/src/nvtt/squish/clusterfit.h @@ -23,15 +23,15 @@ -------------------------------------------------------------------------- */ -#ifndef NV_SQUISH_CLUSTERFIT_H -#define NV_SQUISH_CLUSTERFIT_H +#ifndef SQUISH_CLUSTERFIT_H +#define SQUISH_CLUSTERFIT_H #include "squish.h" #include "maths.h" #include "simd.h" #include "colourfit.h" -namespace nvsquish { +namespace squish { class ClusterFit : public ColourFit { diff --git a/src/nvtt/squish/colourblock.cpp b/src/nvtt/squish/colourblock.cpp index 37e4b02..c0a0225 100644 --- a/src/nvtt/squish/colourblock.cpp +++ b/src/nvtt/squish/colourblock.cpp @@ -25,7 +25,7 @@ #include "colourblock.h" -namespace nvsquish { +namespace squish { static int FloatToInt( float a, int limit ) { diff --git a/src/nvtt/squish/colourblock.h b/src/nvtt/squish/colourblock.h index bb0c3b0..f195323 100644 --- a/src/nvtt/squish/colourblock.h +++ b/src/nvtt/squish/colourblock.h @@ -23,13 +23,13 @@ -------------------------------------------------------------------------- */ -#ifndef NV_SQUISH_COLOURBLOCK_H -#define NV_SQUISH_COLOURBLOCK_H +#ifndef SQUISH_COLOURBLOCK_H +#define SQUISH_COLOURBLOCK_H #include "squish.h" #include "maths.h" -namespace nvsquish { +namespace squish { void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block ); void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block ); diff --git a/src/nvtt/squish/colourfit.cpp b/src/nvtt/squish/colourfit.cpp index 7df7047..15d8a74 100644 --- a/src/nvtt/squish/colourfit.cpp +++ b/src/nvtt/squish/colourfit.cpp @@ -26,7 +26,7 @@ #include "colourfit.h" #include "colourset.h" -namespace nvsquish { +namespace squish { ColourFit::ColourFit() { diff --git a/src/nvtt/squish/colourfit.h b/src/nvtt/squish/colourfit.h index 4c4dc6b..23314d8 100644 --- a/src/nvtt/squish/colourfit.h +++ b/src/nvtt/squish/colourfit.h @@ -23,13 +23,13 @@ -------------------------------------------------------------------------- */ -#ifndef NV_SQUISH_COLOURFIT_H -#define NV_SQUISH_COLOURFIT_H +#ifndef SQUISH_COLOURFIT_H +#define SQUISH_COLOURFIT_H #include "squish.h" #include "maths.h" -namespace nvsquish { +namespace squish { class ColourSet; diff --git a/src/nvtt/squish/colourset.cpp b/src/nvtt/squish/colourset.cpp index d269da5..82a7571 100644 --- a/src/nvtt/squish/colourset.cpp +++ b/src/nvtt/squish/colourset.cpp @@ -25,7 +25,7 @@ #include "colourset.h" -namespace nvsquish { +namespace squish { // @@ Add flags: // - MatchTransparent diff --git a/src/nvtt/squish/colourset.h b/src/nvtt/squish/colourset.h index a95ab64..f96aa21 100644 --- a/src/nvtt/squish/colourset.h +++ b/src/nvtt/squish/colourset.h @@ -23,21 +23,21 @@ -------------------------------------------------------------------------- */ -#ifndef NV_SQUISH_COLOURSET_H -#define NV_SQUISH_COLOURSET_H +#ifndef SQUISH_COLOURSET_H +#define SQUISH_COLOURSET_H #include "squish.h" #include "maths.h" #include "simd.h" -namespace nvsquish { +namespace squish { /*! @brief Represents a set of block colours */ class ColourSet { public: - ColourSet( u8 const* rgba, int flags, bool createMinimalSet = true ); + ColourSet( u8 const* rgba, int flags, bool createMinimalSet = false ); int GetCount() const { return m_count; } Vec3 const* GetPoints() const { return m_points; } diff --git a/src/nvtt/squish/extra/squishgen2.cpp b/src/nvtt/squish/extra/squishgen2.cpp deleted file mode 100644 index f613d0c..0000000 --- a/src/nvtt/squish/extra/squishgen2.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* ----------------------------------------------------------------------------- - - Copyright (c) 2006 Simon Brown si@sjbrown.co.uk - Copyright (c) 2008 Ignacio Castano castano@gmail.com - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -------------------------------------------------------------------------- */ - -#include -#include -#include - -struct Precomp { - float alpha2_sum; - float beta2_sum; - float alphabeta_sum; - float factor; -}; - - -int main() -{ - int i = 0; - - printf("struct Precomp {\n"); - printf("\tfloat alpha2_sum;\n"); - printf("\tfloat beta2_sum;\n"); - printf("\tfloat alphabeta_sum;\n"); - printf("\tfloat factor;\n"); - printf("};\n\n"); - - printf("static const SQUISH_ALIGN_16 Precomp s_threeElement[153] = {\n"); - - // Three element clusters: - for( int c0 = 0; c0 <= 16; c0++) // At least two clusters. - { - for( int c1 = 0; c1 <= 16-c0; c1++) - { - int c2 = 16 - c0 - c1; - - Precomp p; - p.alpha2_sum = c0 + c1 * 0.25f; - p.beta2_sum = c2 + c1 * 0.25f; - p.alphabeta_sum = c1 * 0.25f; - p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum); - - if (isfinite(p.factor)) - { - printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2); - } - else - { - printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2); - } - - i++; - } - } - printf("}; // %d three cluster elements\n\n", i); - - printf("static const SQUISH_ALIGN_16 Precomp s_fourElement[969] = {\n"); - - // Four element clusters: - i = 0; - for( int c0 = 0; c0 <= 16; c0++) - { - for( int c1 = 0; c1 <= 16-c0; c1++) - { - for( int c2 = 0; c2 <= 16-c0-c1; c2++) - { - int c3 = 16 - c0 - c1 - c2; - - Precomp p; - p.alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f); - p.beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f); - p.alphabeta_sum = (c1 + c2) * (2.0f/9.0f); - p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum); - - if (isfinite(p.factor)) - { - printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2, c3); - } - else - { - printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2, c3); - } - - i++; - } - } - } - printf("}; // %d four cluster elements\n\n", i); - - return 0; -} diff --git a/src/nvtt/squish/fastclusterfit.cpp b/src/nvtt/squish/fastclusterfit.cpp index 0b19150..8ae8ab5 100644 --- a/src/nvtt/squish/fastclusterfit.cpp +++ b/src/nvtt/squish/fastclusterfit.cpp @@ -31,7 +31,7 @@ #include "fastclusterlookup.inl" -namespace nvsquish { +namespace squish { FastClusterFit::FastClusterFit() { @@ -129,8 +129,6 @@ void FastClusterFit::Compress3( void* block ) Vec4 const zero = VEC4_CONST(0.0f); Vec4 const half = VEC4_CONST(0.5f); Vec4 const two = VEC4_CONST(2.0); - Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); - Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); // declare variables Vec4 beststart = VEC4_CONST( 0.0f ); @@ -162,22 +160,25 @@ void FastClusterFit::Compress3( void* block ) Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; - // clamp to the grid + // clamp the output to [0, 1] a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); + + // clamp to the grid + Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); + Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; - // compute the error (we skip the constant xxsum) - Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); - Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); - Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); - Vec4 e4 = MultiplyAdd( two, e3, e1 ); - + // compute the error + Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum ); + Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 ); + // apply the metric to the error term - Vec4 e5 = e4 * m_metricSqr; - Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); - + Vec4 e4 = e3 * m_metricSqr; + Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ(); + // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) { @@ -273,7 +274,7 @@ void FastClusterFit::Compress4( void* block ) Vec4 const factor = constants.SplatW(); i++; - Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); + Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); Vec4 const betax_sum = m_xsum - alphax_sum; Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; @@ -285,19 +286,18 @@ void FastClusterFit::Compress4( void* block ) // clamp to the grid Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); - Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); + Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; - // compute the error (we skip the constant xxsum) - Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); - Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); - Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); - Vec4 e4 = MultiplyAdd( two, e3, e1 ); - + // compute the error + Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum ); + Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 ); + // apply the metric to the error term - Vec4 e5 = e4 * m_metricSqr; - Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); + Vec4 e4 = e3 * m_metricSqr; + Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ(); // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) @@ -370,12 +370,6 @@ void FastClusterFit::Compress4( void* block ) void FastClusterFit::Compress3( void* block ) { - Vec3 const one( 1.0f ); - Vec3 const zero( 0.0f ); - Vec3 const half( 0.5f ); - Vec3 const grid( 31.0f, 63.0f, 31.0f ); - Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); - // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); @@ -405,9 +399,16 @@ void FastClusterFit::Compress3( void* block ) Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor; Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor; - // clamp to the grid + // clamp the output to [0, 1] + Vec3 const one( 1.0f ); + Vec3 const zero( 0.0f ); a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); + + // clamp to the grid + Vec3 const grid( 31.0f, 63.0f, 31.0f ); + Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f ); + Vec3 const half( 0.5f ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; @@ -476,12 +477,6 @@ void FastClusterFit::Compress3( void* block ) void FastClusterFit::Compress4( void* block ) { - Vec3 const one( 1.0f ); - Vec3 const zero( 0.0f ); - Vec3 const half( 0.5f ); - Vec3 const grid( 31.0f, 63.0f, 31.0f ); - Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); - // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); @@ -516,9 +511,16 @@ void FastClusterFit::Compress4( void* block ) Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor; Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor; - // clamp to the grid + // clamp the output to [0, 1] + Vec3 const one( 1.0f ); + Vec3 const zero( 0.0f ); a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); + + // clamp to the grid + Vec3 const grid( 31.0f, 63.0f, 31.0f ); + Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f ); + Vec3 const half( 0.5f ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; diff --git a/src/nvtt/squish/fastclusterfit.h b/src/nvtt/squish/fastclusterfit.h index cf36eb9..d0ed971 100644 --- a/src/nvtt/squish/fastclusterfit.h +++ b/src/nvtt/squish/fastclusterfit.h @@ -24,15 +24,15 @@ -------------------------------------------------------------------------- */ -#ifndef NV_SQUISH_FASTCLUSTERFIT_H -#define NV_SQUISH_FASTCLUSTERFIT_H +#ifndef SQUISH_FASTCLUSTERFIT_H +#define SQUISH_FASTCLUSTERFIT_H #include "squish.h" #include "maths.h" #include "simd.h" #include "colourfit.h" -namespace nvsquish { +namespace squish { class FastClusterFit : public ColourFit { diff --git a/src/nvtt/squish/maths.cpp b/src/nvtt/squish/maths.cpp index 35934a3..9eda793 100644 --- a/src/nvtt/squish/maths.cpp +++ b/src/nvtt/squish/maths.cpp @@ -27,7 +27,7 @@ #include "simd.h" #include -namespace nvsquish { +namespace squish { Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights, Vec3::Arg metric ) { @@ -112,4 +112,4 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) #endif -} // namespace nvsquish +} // namespace squish diff --git a/src/nvtt/squish/maths.h b/src/nvtt/squish/maths.h index 19f1d9d..087a889 100644 --- a/src/nvtt/squish/maths.h +++ b/src/nvtt/squish/maths.h @@ -23,14 +23,14 @@ -------------------------------------------------------------------------- */ -#ifndef NV_SQUISH_MATHS_H -#define NV_SQUISH_MATHS_H +#ifndef SQUISH_MATHS_H +#define SQUISH_MATHS_H #include #include #include "config.h" -namespace nvsquish { +namespace squish { class Vec3 { diff --git a/src/nvtt/squish/simd.h b/src/nvtt/squish/simd.h index 7066854..cb98e7f 100644 --- a/src/nvtt/squish/simd.h +++ b/src/nvtt/squish/simd.h @@ -23,8 +23,8 @@ -------------------------------------------------------------------------- */ -#ifndef NV_SQUISH_SIMD_H -#define NV_SQUISH_SIMD_H +#ifndef SQUISH_SIMD_H +#define SQUISH_SIMD_H #include "maths.h" diff --git a/src/nvtt/squish/simd_sse.h b/src/nvtt/squish/simd_sse.h index aa0e7b7..853ee7a 100644 --- a/src/nvtt/squish/simd_sse.h +++ b/src/nvtt/squish/simd_sse.h @@ -23,8 +23,8 @@ -------------------------------------------------------------------------- */ -#ifndef NV_SQUISH_SIMD_SSE_H -#define NV_SQUISH_SIMD_SSE_H +#ifndef SQUISH_SIMD_SSE_H +#define SQUISH_SIMD_SSE_H #include #if ( SQUISH_USE_SSE > 1 ) @@ -35,7 +35,7 @@ #define SQUISH_SSE_SPLAT( a ) \ ( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) ) -namespace nvsquish { +namespace squish { #define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) ) diff --git a/src/nvtt/squish/simd_ve.h b/src/nvtt/squish/simd_ve.h index 2be08fa..56ed95e 100644 --- a/src/nvtt/squish/simd_ve.h +++ b/src/nvtt/squish/simd_ve.h @@ -26,12 +26,10 @@ #ifndef SQUISH_SIMD_VE_H #define SQUISH_SIMD_VE_H -#ifndef __APPLE_ALTIVEC__ #include #undef bool -#endif -namespace nvsquish { +namespace squish { #define VEC4_CONST( X ) Vec4( ( vector float )( X ) ) diff --git a/src/nvtt/squish/squish.h b/src/nvtt/squish/squish.h index 3dfd79a..fdd8fde 100644 --- a/src/nvtt/squish/squish.h +++ b/src/nvtt/squish/squish.h @@ -23,11 +23,11 @@ -------------------------------------------------------------------------- */ -#ifndef NV_SQUISH_H -#define NV_SQUISH_H +#ifndef SQUISH_H +#define SQUISH_H //! All squish API functions live in this namespace. -namespace nvsquish { +namespace squish { // ----------------------------------------------------------------------------- diff --git a/src/nvtt/squish/squish.xcodeproj/project.pbxproj b/src/nvtt/squish/squish.xcodeproj/project.pbxproj new file mode 100644 index 0000000..7812da8 --- /dev/null +++ b/src/nvtt/squish/squish.xcodeproj/project.pbxproj @@ -0,0 +1,531 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 42; + objects = { + +/* Begin PBXBuildFile section */ + 133FA0DC096A7B8E0050752E /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 133FA0DA096A7B8E0050752E /* alpha.h */; }; + 133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 133FA0DB096A7B8E0050752E /* alpha.cpp */; }; + 1342B4160999DF1900152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; }; + 1342B41A0999DF7000152915 /* squishpng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B4190999DF7000152915 /* squishpng.cpp */; }; + 1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B43E0999E0CC00152915 /* squishtest.cpp */; }; + 1342B4420999E0EC00152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; }; + 1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70B092AA857005EE038 /* clusterfit.cpp */; }; + 1350D71B092AA858005EE038 /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D70C092AA858005EE038 /* clusterfit.h */; }; + 1350D71E092AA858005EE038 /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70F092AA858005EE038 /* colourblock.cpp */; }; + 1350D71F092AA858005EE038 /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D710092AA858005EE038 /* colourblock.h */; }; + 1350D720092AA858005EE038 /* config.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D711092AA858005EE038 /* config.h */; }; + 1350D721092AA858005EE038 /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D712092AA858005EE038 /* maths.cpp */; }; + 1350D722092AA858005EE038 /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D713092AA858005EE038 /* maths.h */; }; + 1350D725092AA858005EE038 /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D716092AA858005EE038 /* rangefit.cpp */; }; + 1350D726092AA858005EE038 /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D717092AA858005EE038 /* rangefit.h */; }; + 1350D727092AA858005EE038 /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D718092AA858005EE038 /* squish.cpp */; }; + 1350D728092AA858005EE038 /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D719092AA858005EE038 /* squish.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C21CE09ADAB0800A2500D /* squishgen.cpp */; }; + 139C234F09B0602700A2500D /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 139C234D09B0602700A2500D /* singlecolourfit.h */; }; + 139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C234E09B0602700A2500D /* singlecolourfit.cpp */; }; + 13A7CCA40952BE63001C963A /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 13A7CCA20952BE63001C963A /* colourfit.h */; }; + 13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13A7CCA30952BE63001C963A /* colourfit.cpp */; }; + 13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13C4C7AB0941C18000AC5B89 /* colourset.cpp */; }; + 13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 13C4C7AC0941C18000AC5B89 /* colourset.h */; }; + 13CD64C2092BCF8A00488C97 /* simd.h in Headers */ = {isa = PBXBuildFile; fileRef = 13CD64C0092BCF8A00488C97 /* simd.h */; }; + 13D0DC910931F93A00909807 /* simd_ve.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC900931F93A00909807 /* simd_ve.h */; }; + 13D0DC970931F9D600909807 /* simd_sse.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC960931F9D600909807 /* simd_sse.h */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 1342B52B099BF72F00152915 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = D2AAC045055464E500DB518D; + remoteInfo = squish; + }; + 1342B58E099BF93D00152915 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = D2AAC045055464E500DB518D; + remoteInfo = squish; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXFileReference section */ + 133FA0DA096A7B8E0050752E /* alpha.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = ""; }; + 133FA0DB096A7B8E0050752E /* alpha.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = ""; }; + 1342B4110999DE7F00152915 /* squishpng */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishpng; sourceTree = BUILT_PRODUCTS_DIR; }; + 1342B4190999DF7000152915 /* squishpng.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = squishpng.cpp; path = extra/squishpng.cpp; sourceTree = ""; }; + 1342B4370999E07C00152915 /* squishtest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishtest; sourceTree = BUILT_PRODUCTS_DIR; }; + 1342B43E0999E0CC00152915 /* squishtest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishtest.cpp; path = extra/squishtest.cpp; sourceTree = ""; }; + 1350D70B092AA857005EE038 /* clusterfit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfit.cpp; sourceTree = ""; }; + 1350D70C092AA858005EE038 /* clusterfit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = clusterfit.h; sourceTree = ""; }; + 1350D70F092AA858005EE038 /* colourblock.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourblock.cpp; sourceTree = ""; }; + 1350D710092AA858005EE038 /* colourblock.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourblock.h; sourceTree = ""; }; + 1350D711092AA858005EE038 /* config.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = ""; }; + 1350D712092AA858005EE038 /* maths.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = maths.cpp; sourceTree = ""; }; + 1350D713092AA858005EE038 /* maths.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = maths.h; sourceTree = ""; }; + 1350D716092AA858005EE038 /* rangefit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = rangefit.cpp; sourceTree = ""; }; + 1350D717092AA858005EE038 /* rangefit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = rangefit.h; sourceTree = ""; }; + 1350D718092AA858005EE038 /* squish.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = squish.cpp; sourceTree = ""; }; + 1350D719092AA858005EE038 /* squish.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = squish.h; sourceTree = ""; }; + 13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = texture_compression_s3tc.txt; sourceTree = ""; }; + 139C21C409ADAA7000A2500D /* squishgen */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishgen; sourceTree = BUILT_PRODUCTS_DIR; }; + 139C21CE09ADAB0800A2500D /* squishgen.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishgen.cpp; path = extra/squishgen.cpp; sourceTree = ""; }; + 139C234D09B0602700A2500D /* singlecolourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = singlecolourfit.h; sourceTree = ""; }; + 139C234E09B0602700A2500D /* singlecolourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = singlecolourfit.cpp; sourceTree = ""; }; + 139C236D09B060A900A2500D /* singlecolourlookup.inl */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = singlecolourlookup.inl; sourceTree = ""; }; + 13A7CCA20952BE63001C963A /* colourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colourfit.h; sourceTree = ""; }; + 13A7CCA30952BE63001C963A /* colourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colourfit.cpp; sourceTree = ""; }; + 13C4C7AB0941C18000AC5B89 /* colourset.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourset.cpp; sourceTree = ""; }; + 13C4C7AC0941C18000AC5B89 /* colourset.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourset.h; sourceTree = ""; }; + 13CD64C0092BCF8A00488C97 /* simd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd.h; sourceTree = ""; }; + 13D0DC900931F93A00909807 /* simd_ve.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_ve.h; sourceTree = ""; }; + 13D0DC960931F9D600909807 /* simd_sse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_sse.h; sourceTree = ""; }; + D2AAC046055464E500DB518D /* libsquish.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsquish.a; sourceTree = BUILT_PRODUCTS_DIR; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 1342B40F0999DE7F00152915 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 1342B4160999DF1900152915 /* libsquish.a in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 1342B4350999E07C00152915 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 1342B4420999E0EC00152915 /* libsquish.a in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 139C21C209ADAA7000A2500D /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + D289987405E68DCB004EDB86 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 08FB7794FE84155DC02AAC07 /* squish */ = { + isa = PBXGroup; + children = ( + 08FB7795FE84155DC02AAC07 /* Source */, + C6A0FF2B0290797F04C91782 /* Documentation */, + 1AB674ADFE9D54B511CA2CBB /* Products */, + ); + name = squish; + sourceTree = ""; + }; + 08FB7795FE84155DC02AAC07 /* Source */ = { + isa = PBXGroup; + children = ( + 133FA0DB096A7B8E0050752E /* alpha.cpp */, + 133FA0DA096A7B8E0050752E /* alpha.h */, + 1350D70B092AA857005EE038 /* clusterfit.cpp */, + 1350D70C092AA858005EE038 /* clusterfit.h */, + 13A7CCA30952BE63001C963A /* colourfit.cpp */, + 13A7CCA20952BE63001C963A /* colourfit.h */, + 13C4C7AB0941C18000AC5B89 /* colourset.cpp */, + 13C4C7AC0941C18000AC5B89 /* colourset.h */, + 1350D70F092AA858005EE038 /* colourblock.cpp */, + 1350D710092AA858005EE038 /* colourblock.h */, + 13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */, + 1350D711092AA858005EE038 /* config.h */, + 1350D712092AA858005EE038 /* maths.cpp */, + 1350D713092AA858005EE038 /* maths.h */, + 1350D716092AA858005EE038 /* rangefit.cpp */, + 1350D717092AA858005EE038 /* rangefit.h */, + 13CD64C0092BCF8A00488C97 /* simd.h */, + 13D0DC960931F9D600909807 /* simd_sse.h */, + 13D0DC900931F93A00909807 /* simd_ve.h */, + 139C234E09B0602700A2500D /* singlecolourfit.cpp */, + 139C234D09B0602700A2500D /* singlecolourfit.h */, + 139C236D09B060A900A2500D /* singlecolourlookup.inl */, + 1350D718092AA858005EE038 /* squish.cpp */, + 1350D719092AA858005EE038 /* squish.h */, + 139C21CE09ADAB0800A2500D /* squishgen.cpp */, + 1342B4190999DF7000152915 /* squishpng.cpp */, + 1342B43E0999E0CC00152915 /* squishtest.cpp */, + ); + name = Source; + sourceTree = ""; + }; + 1AB674ADFE9D54B511CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + D2AAC046055464E500DB518D /* libsquish.a */, + 1342B4110999DE7F00152915 /* squishpng */, + 1342B4370999E07C00152915 /* squishtest */, + 139C21C409ADAA7000A2500D /* squishgen */, + ); + name = Products; + sourceTree = ""; + }; + C6A0FF2B0290797F04C91782 /* Documentation */ = { + isa = PBXGroup; + children = ( + ); + name = Documentation; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + D2AAC043055464E500DB518D /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + 1350D71B092AA858005EE038 /* clusterfit.h in Headers */, + 1350D71F092AA858005EE038 /* colourblock.h in Headers */, + 1350D720092AA858005EE038 /* config.h in Headers */, + 1350D722092AA858005EE038 /* maths.h in Headers */, + 1350D726092AA858005EE038 /* rangefit.h in Headers */, + 1350D728092AA858005EE038 /* squish.h in Headers */, + 13CD64C2092BCF8A00488C97 /* simd.h in Headers */, + 13D0DC910931F93A00909807 /* simd_ve.h in Headers */, + 13D0DC970931F9D600909807 /* simd_sse.h in Headers */, + 13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */, + 13A7CCA40952BE63001C963A /* colourfit.h in Headers */, + 133FA0DC096A7B8E0050752E /* alpha.h in Headers */, + 139C234F09B0602700A2500D /* singlecolourfit.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 1342B4100999DE7F00152915 /* squishpng */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */; + buildPhases = ( + 1342B40E0999DE7F00152915 /* Sources */, + 1342B40F0999DE7F00152915 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + 1342B58F099BF93D00152915 /* PBXTargetDependency */, + ); + name = squishpng; + productName = squishpng; + productReference = 1342B4110999DE7F00152915 /* squishpng */; + productType = "com.apple.product-type.tool"; + }; + 1342B4360999E07C00152915 /* squishtest */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */; + buildPhases = ( + 1342B4340999E07C00152915 /* Sources */, + 1342B4350999E07C00152915 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + 1342B52C099BF72F00152915 /* PBXTargetDependency */, + ); + name = squishtest; + productName = squishtest; + productReference = 1342B4370999E07C00152915 /* squishtest */; + productType = "com.apple.product-type.tool"; + }; + 139C21C309ADAA7000A2500D /* squishgen */ = { + isa = PBXNativeTarget; + buildConfigurationList = 139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */; + buildPhases = ( + 139C21C109ADAA7000A2500D /* Sources */, + 139C21C209ADAA7000A2500D /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = squishgen; + productName = squishgen; + productReference = 139C21C409ADAA7000A2500D /* squishgen */; + productType = "com.apple.product-type.tool"; + }; + D2AAC045055464E500DB518D /* squish */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */; + buildPhases = ( + D2AAC043055464E500DB518D /* Headers */, + D2AAC044055464E500DB518D /* Sources */, + D289987405E68DCB004EDB86 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = squish; + productName = squish; + productReference = D2AAC046055464E500DB518D /* libsquish.a */; + productType = "com.apple.product-type.library.static"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 08FB7793FE84155DC02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */; + hasScannedForEncodings = 1; + mainGroup = 08FB7794FE84155DC02AAC07 /* squish */; + projectDirPath = ""; + targets = ( + D2AAC045055464E500DB518D /* squish */, + 1342B4100999DE7F00152915 /* squishpng */, + 1342B4360999E07C00152915 /* squishtest */, + 139C21C309ADAA7000A2500D /* squishgen */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 1342B40E0999DE7F00152915 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 1342B41A0999DF7000152915 /* squishpng.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 1342B4340999E07C00152915 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 139C21C109ADAA7000A2500D /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + D2AAC044055464E500DB518D /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */, + 1350D71E092AA858005EE038 /* colourblock.cpp in Sources */, + 1350D721092AA858005EE038 /* maths.cpp in Sources */, + 1350D725092AA858005EE038 /* rangefit.cpp in Sources */, + 1350D727092AA858005EE038 /* squish.cpp in Sources */, + 13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */, + 13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */, + 133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */, + 139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 1342B52C099BF72F00152915 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = D2AAC045055464E500DB518D /* squish */; + targetProxy = 1342B52B099BF72F00152915 /* PBXContainerItemProxy */; + }; + 1342B58F099BF93D00152915 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = D2AAC045055464E500DB518D /* squish */; + targetProxy = 1342B58E099BF93D00152915 /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin XCBuildConfiguration section */ + 1342B4140999DE9F00152915 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + HEADER_SEARCH_PATHS = ( + .., + /sw/include, + ); + INSTALL_PATH = "$(HOME)/bin"; + LIBRARY_SEARCH_PATHS = /sw/lib; + OTHER_LDFLAGS = "-lpng"; + PRODUCT_NAME = squishpng; + }; + name = Debug; + }; + 1342B4150999DE9F00152915 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + HEADER_SEARCH_PATHS = ( + .., + /sw/include, + ); + INSTALL_PATH = "$(HOME)/bin"; + LIBRARY_SEARCH_PATHS = /sw/lib; + OTHER_LDFLAGS = "-lpng"; + PRODUCT_NAME = squishpng; + }; + name = Release; + }; + 1342B43C0999E0C000152915 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + HEADER_SEARCH_PATHS = ..; + INSTALL_PATH = "$(HOME)/bin"; + PRODUCT_NAME = squishtest; + }; + name = Debug; + }; + 1342B43D0999E0C000152915 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + HEADER_SEARCH_PATHS = ..; + INSTALL_PATH = "$(HOME)/bin"; + PRODUCT_NAME = squishtest; + }; + name = Release; + }; + 139C21CC09ADAB0300A2500D /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + HEADER_SEARCH_PATHS = ..; + INSTALL_PATH = "$(HOME)/bin"; + PRODUCT_NAME = squishgen; + }; + name = Debug; + }; + 139C21CD09ADAB0300A2500D /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + HEADER_SEARCH_PATHS = ..; + INSTALL_PATH = "$(HOME)/bin"; + PRODUCT_NAME = squishgen; + }; + name = Release; + }; + 1DEB91EC08733DB70010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + COPY_PHASE_STRIP = NO; + GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1"; + INSTALL_PATH = /usr/local/lib; + OTHER_CFLAGS = "-maltivec"; + PRODUCT_NAME = squish; + STRIP_INSTALLED_PRODUCT = NO; + }; + name = Debug; + }; + 1DEB91ED08733DB70010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1"; + INSTALL_PATH = /usr/local/lib; + OTHER_CFLAGS = "-maltivec"; + PRODUCT_NAME = squish; + STRIP_INSTALLED_PRODUCT = YES; + }; + name = Release; + }; + 1DEB91F008733DB70010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_DYNAMIC_NO_PIC = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_TREAT_WARNINGS_AS_ERRORS = YES; + GCC_WARN_ABOUT_MISSING_NEWLINE = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_PEDANTIC = YES; + GCC_WARN_SHADOW = YES; + GCC_WARN_SIGN_COMPARE = YES; + GCC_WARN_UNUSED_PARAMETER = YES; + GCC_WARN_UNUSED_VALUE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + PREBINDING = NO; + SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk; + }; + name = Debug; + }; + 1DEB91F108733DB70010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_DYNAMIC_NO_PIC = YES; + GCC_OPTIMIZATION_LEVEL = 3; + GCC_TREAT_WARNINGS_AS_ERRORS = YES; + GCC_UNROLL_LOOPS = YES; + GCC_WARN_ABOUT_MISSING_NEWLINE = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_PEDANTIC = YES; + GCC_WARN_SHADOW = YES; + GCC_WARN_SIGN_COMPARE = YES; + GCC_WARN_UNUSED_PARAMETER = YES; + GCC_WARN_UNUSED_VALUE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + PREBINDING = NO; + SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1342B4140999DE9F00152915 /* Debug */, + 1342B4150999DE9F00152915 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1342B43C0999E0C000152915 /* Debug */, + 1342B43D0999E0C000152915 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 139C21CC09ADAB0300A2500D /* Debug */, + 139C21CD09ADAB0300A2500D /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB91EC08733DB70010E9CD /* Debug */, + 1DEB91ED08733DB70010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB91F008733DB70010E9CD /* Debug */, + 1DEB91F108733DB70010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; +} diff --git a/src/nvtt/squish/texture_compression_s3tc.txt b/src/nvtt/squish/texture_compression_s3tc.txt new file mode 100644 index 0000000..f229cf3 --- /dev/null +++ b/src/nvtt/squish/texture_compression_s3tc.txt @@ -0,0 +1,508 @@ +Name + + EXT_texture_compression_s3tc + +Name Strings + + GL_EXT_texture_compression_s3tc + +Contact + + Pat Brown, NVIDIA Corporation (pbrown 'at' nvidia.com) + +Status + + FINAL + +Version + + 1.1, 16 November 2001 (containing only clarifications relative to + version 1.0, dated 7 July 2000) + +Number + + 198 + +Dependencies + + OpenGL 1.1 is required. + + GL_ARB_texture_compression is required. + + This extension is written against the OpenGL 1.2.1 Specification. + +Overview + + This extension provides additional texture compression functionality + specific to S3's S3TC format (called DXTC in Microsoft's DirectX API), + subject to all the requirements and limitations described by the extension + GL_ARB_texture_compression. + + This extension supports DXT1, DXT3, and DXT5 texture compression formats. + For the DXT1 image format, this specification supports an RGB-only mode + and a special RGBA mode with single-bit "transparent" alpha. + +IP Status + + Contact S3 Incorporated (http://www.s3.com) regarding any intellectual + property issues associated with implementing this extension. + + WARNING: Vendors able to support S3TC texture compression in Direct3D + drivers do not necessarily have the right to use the same functionality in + OpenGL. + +Issues + + (1) Should DXT2 and DXT4 (premultiplied alpha) formats be supported? + + RESOLVED: No -- insufficient interest. Supporting DXT2 and DXT4 + would require some rework to the TexEnv definition (maybe add a new + base internal format RGBA_PREMULTIPLIED_ALPHA) for these formats. + Note that the EXT_texture_env_combine extension (which extends normal + TexEnv modes) can be used to support textures with premultipled alpha. + + (2) Should generic "RGB_S3TC_EXT" and "RGBA_S3TC_EXT" enums be supported + or should we use only the DXT enums? + + RESOLVED: No. A generic RGBA_S3TC_EXT is problematic because DXT3 + and DXT5 are both nominally RGBA (and DXT1 with the 1-bit alpha is + also) yet one format must be chosen up front. + + (3) Should TexSubImage support all block-aligned edits or just the minimal + functionality required by the ARB_texture_compression extension? + + RESOLVED: Allow all valid block-aligned edits. + + (4) A pre-compressed image with a DXT1 format can be used as either an + RGB_S3TC_DXT1 or an RGBA_S3TC_DXT1 image. If the image has + transparent texels, how are they treated in each format? + + RESOLVED: The renderer has to make sure that an RGB_S3TC_DXT1 format + is decoded as RGB (where alpha is effectively one for all texels), + while RGBA_S3TC_DXT1 is decoded as RGBA (where alpha is zero for all + texels with "transparent" encodings). Otherwise, the formats are + identical. + + (5) Is the encoding of the RGB components for DXT1 formats correct in this + spec? MSDN documentation does not specify an RGB color for the + "transparent" encoding. Is it really black? + + RESOLVED: Yes. The specification for the DXT1 format initially + required black, but later changed that requirement to a + recommendation. All vendors involved in the definition of this + specification support black. In addition, specifying black has a + useful behavior. + + When blending multiple texels (GL_LINEAR filtering), mixing opaque and + transparent samples is problematic. Defining a black color on + transparent texels achieves a sensible result that works like a + texture with premultiplied alpha. For example, if three opaque white + and one transparent sample is being averaged, the result would be a + 75% intensity gray (with an alpha of 75%). This is the same result on + the color channels as would be obtained using a white color, 75% + alpha, and a SRC_ALPHA blend factor. + + (6) Is the encoding of the RGB components for DXT3 and DXT5 formats + correct in this spec? MSDN documentation suggests that the RGB blocks + for DXT3 and DXT5 are decoded as described by the DXT1 format. + + RESOLVED: Yes -- this appears to be a bug in the MSDN documentation. + The specification for the DXT2-DXT5 formats require decoding using the + opaque block encoding, regardless of the relative values of "color0" + and "color1". + +New Procedures and Functions + + None. + +New Tokens + + Accepted by the parameter of TexImage2D, CopyTexImage2D, + and CompressedTexImage2DARB and the parameter of + CompressedTexSubImage2DARB: + + COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0 + COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 + COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2 + COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3 + +Additions to Chapter 2 of the OpenGL 1.2.1 Specification (OpenGL Operation) + + None. + +Additions to Chapter 3 of the OpenGL 1.2.1 Specification (Rasterization) + + Add to Table 3.16.1: Specific Compressed Internal Formats + + Compressed Internal Format Base Internal Format + ========================== ==================== + COMPRESSED_RGB_S3TC_DXT1_EXT RGB + COMPRESSED_RGBA_S3TC_DXT1_EXT RGBA + COMPRESSED_RGBA_S3TC_DXT3_EXT RGBA + COMPRESSED_RGBA_S3TC_DXT5_EXT RGBA + + + Modify Section 3.8.2, Alternate Image Specification + + (add to end of TexSubImage discussion, p.123 -- after edit from the + ARB_texture_compression spec) + + If the internal format of the texture image being modified is + COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, + COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the + texture is stored using one of the several S3TC compressed texture image + formats. Such images are easily edited along 4x4 texel boundaries, so the + limitations on TexSubImage2D or CopyTexSubImage2D parameters are relaxed. + TexSubImage2D and CopyTexSubImage2D will result in an INVALID_OPERATION + error only if one of the following conditions occurs: + + * is not a multiple of four or equal to TEXTURE_WIDTH, + unless and are both zero. + * is not a multiple of four or equal to TEXTURE_HEIGHT, + unless and are both zero. + * or is not a multiple of four. + + The contents of any 4x4 block of texels of an S3TC compressed texture + image that does not intersect the area being modified are preserved during + valid TexSubImage2D and CopyTexSubImage2D calls. + + + Add to Section 3.8.2, Alternate Image Specification (adding to the end of + the CompressedTexImage section introduced by the ARB_texture_compression + spec) + + If is COMPRESSED_RGB_S3TC_DXT1_EXT, + COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or + COMPRESSED_RGBA_S3TC_DXT5_EXT, the compressed texture is stored using one + of several S3TC compressed texture image formats. The S3TC texture + compression algorithm supports only 2D images without borders. + CompressedTexImage1DARB and CompressedTexImage3DARB produce an + INVALID_ENUM error if is an S3TC format. + CompressedTexImage2DARB will produce an INVALID_OPERATION error if + is non-zero. + + + Add to Section 3.8.2, Alternate Image Specification (adding to the end of + the CompressedTexSubImage section introduced by the + ARB_texture_compression spec) + + If the internal format of the texture image being modified is + COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, + COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the + texture is stored using one of the several S3TC compressed texture image + formats. Since the S3TC texture compression algorithm supports only 2D + images, CompressedTexSubImage1DARB and CompressedTexSubImage3DARB produce + an INVALID_ENUM error if is an S3TC format. Since S3TC images + are easily edited along 4x4 texel boundaries, the limitations on + CompressedTexSubImage2D are relaxed. CompressedTexSubImage2D will result + in an INVALID_OPERATION error only if one of the following conditions + occurs: + + * is not a multiple of four or equal to TEXTURE_WIDTH. + * is not a multiple of four or equal to TEXTURE_HEIGHT. + * or is not a multiple of four. + + The contents of any 4x4 block of texels of an S3TC compressed texture + image that does not intersect the area being modified are preserved during + valid TexSubImage2D and CopyTexSubImage2D calls. + +Additions to Chapter 4 of the OpenGL 1.2.1 Specification (Per-Fragment +Operations and the Frame Buffer) + + None. + +Additions to Chapter 5 of the OpenGL 1.2.1 Specification (Special Functions) + + None. + +Additions to Chapter 6 of the OpenGL 1.2.1 Specification (State and +State Requests) + + None. + +Additions to Appendix A of the OpenGL 1.2.1 Specification (Invariance) + + None. + +Additions to the AGL/GLX/WGL Specifications + + None. + +GLX Protocol + + None. + +Errors + + INVALID_ENUM is generated by CompressedTexImage1DARB or + CompressedTexImage3DARB if is + COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, + COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT. + + INVALID_OPERATION is generated by CompressedTexImage2DARB if + is COMPRESSED_RGB_S3TC_DXT1_EXT, + COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or + COMPRESSED_RGBA_S3TC_DXT5_EXT and is not equal to zero. + + INVALID_ENUM is generated by CompressedTexSubImage1DARB or + CompressedTexSubImage3DARB if is COMPRESSED_RGB_S3TC_DXT1_EXT, + COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or + COMPRESSED_RGBA_S3TC_DXT5_EXT. + + INVALID_OPERATION is generated by TexSubImage2D CopyTexSubImage2D, or + CompressedTexSubImage2D if TEXTURE_INTERNAL_FORMAT is + COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, + COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT and any of + the following apply: is not a multiple of four or equal to + TEXTURE_WIDTH; is not a multiple of four or equal to + TEXTURE_HEIGHT; or is not a multiple of four. + + + The following restrictions from the ARB_texture_compression specification + do not apply to S3TC texture formats, since subimage modification is + straightforward as long as the subimage is properly aligned. + + DELETE: INVALID_OPERATION is generated by TexSubImage1D, TexSubImage2D, + DELETE: TexSubImage3D, CopyTexSubImage1D, CopyTexSubImage2D, or + DELETE: CopyTexSubImage3D if the internal format of the texture image is + DELETE: compressed and , , or does not equal + DELETE: -b, where b is value of TEXTURE_BORDER. + + DELETE: INVALID_VALUE is generated by CompressedTexSubImage1DARB, + DELETE: CompressedTexSubImage2DARB, or CompressedTexSubImage3DARB if the + DELETE: entire texture image is not being edited: if , + DELETE: , or is greater than -b, + is + DELETE: less than w+b, + is less than h+b, or + DELETE: + is less than d+b, where b is the value of + DELETE: TEXTURE_BORDER, w is the value of TEXTURE_WIDTH, h is the value of + DELETE: TEXTURE_HEIGHT, and d is the value of TEXTURE_DEPTH. + + See also errors in the GL_ARB_texture_compression specification. + +New State + + In the "Textures" state table, increment the TEXTURE_INTERNAL_FORMAT + subscript for Z by 4 in the "Type" row. + +New Implementation Dependent State + + None + +Appendix + + S3TC Compressed Texture Image Formats + + Compressed texture images stored using the S3TC compressed image formats + are represented as a collection of 4x4 texel blocks, where each block + contains 64 or 128 bits of texel data. The image is encoded as a normal + 2D raster image in which each 4x4 block is treated as a single pixel. If + an S3TC image has a width or height less than four, the data corresponding + to texels outside the image are irrelevant and undefined. + + When an S3TC image with a width of , height of , and block size of + (8 or 16 bytes) is decoded, the corresponding image size (in + bytes) is: + + ceil(/4) * ceil(/4) * blocksize. + + When decoding an S3TC image, the block containing the texel at offset + (, ) begins at an offset (in bytes) relative to the base of the + image of: + + blocksize * (ceil(/4) * floor(/4) + floor(/4)). + + The data corresponding to a specific texel (, ) are extracted from a + 4x4 texel block using a relative (x,y) value of + + ( modulo 4, modulo 4). + + There are four distinct S3TC image formats: + + COMPRESSED_RGB_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64 + bits of RGB image data. + + Each RGB image data block is encoded as a sequence of 8 bytes, called (in + order of increasing address): + + c0_lo, c0_hi, c1_lo, c1_hi, bits_0, bits_1, bits_2, bits_3 + + The 8 bytes of the block are decoded into three quantities: + + color0 = c0_lo + c0_hi * 256 + color1 = c1_lo + c1_hi * 256 + bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * bits_3)) + + color0 and color1 are 16-bit unsigned integers that are unpacked to + RGB colors RGB0 and RGB1 as though they were 16-bit packed pixels with + a of RGB and a type of UNSIGNED_SHORT_5_6_5. + + bits is a 32-bit unsigned integer, from which a two-bit control code + is extracted for a texel at location (x,y) in the block using: + + code(x,y) = bits[2*(4*y+x)+1..2*(4*y+x)+0] + + where bit 31 is the most significant and bit 0 is the least + significant bit. + + The RGB color for a texel at location (x,y) in the block is given by: + + RGB0, if color0 > color1 and code(x,y) == 0 + RGB1, if color0 > color1 and code(x,y) == 1 + (2*RGB0+RGB1)/3, if color0 > color1 and code(x,y) == 2 + (RGB0+2*RGB1)/3, if color0 > color1 and code(x,y) == 3 + + RGB0, if color0 <= color1 and code(x,y) == 0 + RGB1, if color0 <= color1 and code(x,y) == 1 + (RGB0+RGB1)/2, if color0 <= color1 and code(x,y) == 2 + BLACK, if color0 <= color1 and code(x,y) == 3 + + Arithmetic operations are done per component, and BLACK refers to an + RGB color where red, green, and blue are all zero. + + Since this image has an RGB format, there is no alpha component and the + image is considered fully opaque. + + + COMPRESSED_RGBA_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64 + bits of RGB image data and minimal alpha information. The RGB components + of a texel are extracted in the same way as COMPRESSED_RGB_S3TC_DXT1_EXT. + + The alpha component for a texel at location (x,y) in the block is + given by: + + 0.0, if color0 <= color1 and code(x,y) == 3 + 1.0, otherwise + + IMPORTANT: When encoding an RGBA image into a format using 1-bit + alpha, any texels with an alpha component less than 0.5 end up with an + alpha of 0.0 and any texels with an alpha component greater than or + equal to 0.5 end up with an alpha of 1.0. When encoding an RGBA image + into the COMPRESSED_RGBA_S3TC_DXT1_EXT format, the resulting red, + green, and blue components of any texels with a final alpha of 0.0 + will automatically be zero (black). If this behavior is not desired + by an application, it should not use COMPRESSED_RGBA_S3TC_DXT1_EXT. + This format will never be used when a generic compressed internal + format (Table 3.16.2) is specified, although the nearly identical + format COMPRESSED_RGB_S3TC_DXT1_EXT (above) may be. + + + COMPRESSED_RGBA_S3TC_DXT3_EXT: Each 4x4 block of texels consists of 64 + bits of uncompressed alpha image data followed by 64 bits of RGB image + data. + + Each RGB image data block is encoded according to the + COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code + bits always use the non-transparent encodings. In other words, they are + treated as though color0 > color1, regardless of the actual values of + color0 and color1. + + Each alpha image data block is encoded as a sequence of 8 bytes, called + (in order of increasing address): + + a0, a1, a2, a3, a4, a5, a6, a7 + + The 8 bytes of the block are decoded into one 64-bit integer: + + alpha = a0 + 256 * (a1 + 256 * (a2 + 256 * (a3 + 256 * (a4 + + 256 * (a5 + 256 * (a6 + 256 * a7)))))) + + alpha is a 64-bit unsigned integer, from which a four-bit alpha value + is extracted for a texel at location (x,y) in the block using: + + alpha(x,y) = bits[4*(4*y+x)+3..4*(4*y+x)+0] + + where bit 63 is the most significant and bit 0 is the least + significant bit. + + The alpha component for a texel at location (x,y) in the block is + given by alpha(x,y) / 15. + + + COMPRESSED_RGBA_S3TC_DXT5_EXT: Each 4x4 block of texels consists of 64 + bits of compressed alpha image data followed by 64 bits of RGB image data. + + Each RGB image data block is encoded according to the + COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code + bits always use the non-transparent encodings. In other words, they are + treated as though color0 > color1, regardless of the actual values of + color0 and color1. + + Each alpha image data block is encoded as a sequence of 8 bytes, called + (in order of increasing address): + + alpha0, alpha1, bits_0, bits_1, bits_2, bits_3, bits_4, bits_5 + + The alpha0 and alpha1 are 8-bit unsigned bytes converted to alpha + components by multiplying by 1/255. + + The 6 "bits" bytes of the block are decoded into one 48-bit integer: + + bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 + + 256 * (bits_4 + 256 * bits_5)))) + + bits is a 48-bit unsigned integer, from which a three-bit control code + is extracted for a texel at location (x,y) in the block using: + + code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0] + + where bit 47 is the most significant and bit 0 is the least + significant bit. + + The alpha component for a texel at location (x,y) in the block is + given by: + + alpha0, code(x,y) == 0 + alpha1, code(x,y) == 1 + + (6*alpha0 + 1*alpha1)/7, alpha0 > alpha1 and code(x,y) == 2 + (5*alpha0 + 2*alpha1)/7, alpha0 > alpha1 and code(x,y) == 3 + (4*alpha0 + 3*alpha1)/7, alpha0 > alpha1 and code(x,y) == 4 + (3*alpha0 + 4*alpha1)/7, alpha0 > alpha1 and code(x,y) == 5 + (2*alpha0 + 5*alpha1)/7, alpha0 > alpha1 and code(x,y) == 6 + (1*alpha0 + 6*alpha1)/7, alpha0 > alpha1 and code(x,y) == 7 + + (4*alpha0 + 1*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 2 + (3*alpha0 + 2*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 3 + (2*alpha0 + 3*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 4 + (1*alpha0 + 4*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 5 + 0.0, alpha0 <= alpha1 and code(x,y) == 6 + 1.0, alpha0 <= alpha1 and code(x,y) == 7 + + +Revision History + + 1.1, 11/16/01 pbrown: Updated contact info, clarified where texels + fall within a single block. + + 1.0, 07/07/00 prbrown1: Published final version agreed to by working + group members. + + 0.9, 06/24/00 prbrown1: Documented that block-aligned TexSubImage calls + do not modify existing texels outside the + modified blocks. Added caveat to allow for a + (0,0)-anchored TexSubImage operation of + arbitrary size. + + 0.7, 04/11/00 prbrown1: Added issues on DXT1, DXT3, and DXT5 encodings + where the MSDN documentation doesn't match what + is really done. Added enum values from the + extension registry. + + 0.4, 03/28/00 prbrown1: Updated to reflect final version of the + ARB_texture_compression extension. Allowed + block-aligned TexSubImage calls. + + 0.3, 03/07/00 prbrown1: Resolved issues pertaining to the format of RGB + blocks in the DXT3 and DXT5 formats (they don't + ever use the "transparent" encoding). Fixed + decoding of DXT1 blocks. Pointed out issue of + "transparent" texels in DXT1 encodings having + different behaviors for RGB and RGBA internal + formats. + + 0.2, 02/23/00 prbrown1: Minor revisions; added several issues. + + 0.11, 02/17/00 prbrown1: Slight modification to error semantics + (INVALID_ENUM instead of INVALID_OPERATION). + + 0.1, 02/15/00 prbrown1: Initial revision. diff --git a/src/nvtt/squish/vs7/squish.sln b/src/nvtt/squish/vs7/squish.sln new file mode 100644 index 0000000..ae23f85 --- /dev/null +++ b/src/nvtt/squish/vs7/squish.sln @@ -0,0 +1,39 @@ +Microsoft Visual Studio Solution File, Format Version 8.00 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squish", "squish\squish.vcproj", "{6A8518C3-D81A-4428-BD7F-C37933088AC1}" + ProjectSection(ProjectDependencies) = postProject + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishpng", "squishpng\squishpng.vcproj", "{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}" + ProjectSection(ProjectDependencies) = postProject + {6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishtest", "squishtest\squishtest.vcproj", "{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}" + ProjectSection(ProjectDependencies) = postProject + {6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1} + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfiguration) = preSolution + Debug = Debug + Release = Release + EndGlobalSection + GlobalSection(ProjectConfiguration) = postSolution + {6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.ActiveCfg = Debug|Win32 + {6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.Build.0 = Debug|Win32 + {6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.ActiveCfg = Release|Win32 + {6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.Build.0 = Release|Win32 + {3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.ActiveCfg = Debug|Win32 + {3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.Build.0 = Debug|Win32 + {3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.ActiveCfg = Release|Win32 + {3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.Build.0 = Release|Win32 + {77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.ActiveCfg = Debug|Win32 + {77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.Build.0 = Debug|Win32 + {77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.ActiveCfg = Release|Win32 + {77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + EndGlobalSection + GlobalSection(ExtensibilityAddIns) = postSolution + EndGlobalSection +EndGlobal diff --git a/src/nvtt/squish/vs7/squish/squish.vcproj b/src/nvtt/squish/vs7/squish/squish.vcproj new file mode 100644 index 0000000..2b992a5 --- /dev/null +++ b/src/nvtt/squish/vs7/squish/squish.vcproj @@ -0,0 +1,198 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/nvtt/squish/vs7/squishpng/squishpng.vcproj b/src/nvtt/squish/vs7/squishpng/squishpng.vcproj new file mode 100644 index 0000000..b894b34 --- /dev/null +++ b/src/nvtt/squish/vs7/squishpng/squishpng.vcproj @@ -0,0 +1,140 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/nvtt/squish/vs7/squishtest/squishtest.vcproj b/src/nvtt/squish/vs7/squishtest/squishtest.vcproj new file mode 100644 index 0000000..cbe45eb --- /dev/null +++ b/src/nvtt/squish/vs7/squishtest/squishtest.vcproj @@ -0,0 +1,138 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/nvtt/squish/weightedclusterfit.cpp b/src/nvtt/squish/weightedclusterfit.cpp index f3781b1..6f2318a 100644 --- a/src/nvtt/squish/weightedclusterfit.cpp +++ b/src/nvtt/squish/weightedclusterfit.cpp @@ -30,7 +30,7 @@ #include -namespace nvsquish { +namespace squish { WeightedClusterFit::WeightedClusterFit() { @@ -131,13 +131,11 @@ float WeightedClusterFit::GetBestError() const void WeightedClusterFit::Compress3( void* block ) { - int const count = m_colours->GetCount(); + int const count = m_colours->GetCount(); Vec4 const one = VEC4_CONST(1.0f); Vec4 const zero = VEC4_CONST(0.0f); Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f); Vec4 const two = VEC4_CONST(2.0); - Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); - Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); // declare variables Vec4 beststart = VEC4_CONST( 0.0f ); @@ -176,21 +174,24 @@ void WeightedClusterFit::Compress3( void* block ) Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; - // clamp to the grid + // clamp the output to [0, 1] a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); + + // clamp to the grid + Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); + Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; - // compute the error (we skip the constant xxsum) - Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); - Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); - Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); - Vec4 e4 = MultiplyAdd( two, e3, e1 ); - + // compute the error + Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum ); + Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 ); + // apply the metric to the error term - Vec4 e5 = e4 * m_metricSqr; - Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); + Vec4 e4 = e3 * m_metricSqr; + Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ(); // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) @@ -221,7 +222,7 @@ void WeightedClusterFit::Compress3( void* block ) for(; i < b0+b1; i++) { bestindices[i] = 2; } - for(; i < count; i++) { + for(; i < 16; i++) { bestindices[i] = 1; } } @@ -231,7 +232,7 @@ void WeightedClusterFit::Compress3( void* block ) for( int i = 0; i < count; ++i ) ordered[m_order[i]] = bestindices[i]; - m_colours->RemapIndices( ordered, bestindices ); + m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices. // save the block @@ -244,16 +245,13 @@ void WeightedClusterFit::Compress3( void* block ) void WeightedClusterFit::Compress4( void* block ) { - int const count = m_colours->GetCount(); + int const count = m_colours->GetCount(); Vec4 const one = VEC4_CONST(1.0f); Vec4 const zero = VEC4_CONST(0.0f); Vec4 const half = VEC4_CONST(0.5f); Vec4 const two = VEC4_CONST(2.0); Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f ); Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f ); - Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f ); - Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); - Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); // declare variables Vec4 beststart = VEC4_CONST( 0.0f ); @@ -278,16 +276,16 @@ void WeightedClusterFit::Compress4( void* block ) //Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); //float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); - Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum + Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); // alphax_sum, alpha2_sum Vec4 const alpha2_sum = alphax_sum.SplatW(); //Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f); //float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); - Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum + Vec4 const betax_sum = x3 + MultiplyAdd(x2, twothirds, x1 * onethird); // betax_sum, beta2_sum Vec4 const beta2_sum = betax_sum.SplatW(); - //float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f); - Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum + //float const alphabeta_sum = w1 * (2.0f/9.0f) + w2 * (2.0f/9.0f); + Vec4 const alphabeta_sum = two * (x1 * onethird + x2 * onethird).SplatW(); // alphabeta_sum // float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); @@ -295,21 +293,24 @@ void WeightedClusterFit::Compress4( void* block ) Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; - // clamp to the grid + // clamp the output to [0, 1] a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); + + // clamp to the grid + Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); + Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; - // compute the error (we skip the constant xxsum) - Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); - Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); - Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); - Vec4 e4 = MultiplyAdd( two, e3, e1 ); - + // compute the error + Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum ); + Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 ); + // apply the metric to the error term - Vec4 e5 = e4 * m_metricSqr; - Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); + Vec4 e4 = e3 * m_metricSqr; + Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ(); // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) @@ -347,7 +348,7 @@ void WeightedClusterFit::Compress4( void* block ) for(; i < b0+b1+b2; i++) { bestindices[i] = 3; } - for(; i < count; i++) { + for(; i < 16; i++) { bestindices[i] = 1; } } @@ -357,10 +358,8 @@ void WeightedClusterFit::Compress4( void* block ) for( int i = 0; i < count; ++i ) ordered[m_order[i]] = bestindices[i]; - m_colours->RemapIndices( ordered, bestindices ); - // save the block - WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); + WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block ); // save the error m_besterror = besterror; @@ -371,13 +370,6 @@ void WeightedClusterFit::Compress4( void* block ) void WeightedClusterFit::Compress3( void* block ) { - int const count = m_colours->GetCount(); - Vec3 const one( 1.0f ); - Vec3 const zero( 0.0f ); - Vec3 const half( 0.5f ); - Vec3 const grid( 31.0f, 63.0f, 31.0f ); - Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); - // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); @@ -389,12 +381,12 @@ void WeightedClusterFit::Compress3( void* block ) int b0 = 0, b1 = 0; // check all possible clusters for this total order - for( int c0 = 0; c0 <= count; c0++) + for( int c0 = 0; c0 <= 16; c0++) { Vec3 x1(0.0f); float w1 = 0.0f; - for( int c1 = 0; c1 <= count-c0; c1++) + for( int c1 = 0; c1 <= 16-c0; c1++) { float w2 = m_wsum - w0 - w1; @@ -410,9 +402,16 @@ void WeightedClusterFit::Compress3( void* block ) Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor; Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor; - // clamp to the grid + // clamp the output to [0, 1] + Vec3 const one( 1.0f ); + Vec3 const zero( 0.0f ); a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); + + // clamp to the grid + Vec3 const grid( 31.0f, 63.0f, 31.0f ); + Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f ); + Vec3 const half( 0.5f ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; @@ -453,20 +452,18 @@ void WeightedClusterFit::Compress3( void* block ) for(; i < b0+b1; i++) { bestindices[i] = 2; } - for(; i < count; i++) { + for(; i < 16; i++) { bestindices[i] = 1; } } // remap the indices u8 ordered[16]; - for( int i = 0; i < count; ++i ) + for( int i = 0; i < 16; ++i ) ordered[m_order[i]] = bestindices[i]; - m_colours->RemapIndices( ordered, bestindices ); - // save the block - WriteColourBlock3( beststart, bestend, bestindices, block ); + WriteColourBlock3( beststart, bestend, ordered, block ); // save the error m_besterror = besterror; @@ -475,13 +472,6 @@ void WeightedClusterFit::Compress3( void* block ) void WeightedClusterFit::Compress4( void* block ) { - int const count = m_colours->GetCount(); - Vec3 const one( 1.0f ); - Vec3 const zero( 0.0f ); - Vec3 const half( 0.5f ); - Vec3 const grid( 31.0f, 63.0f, 31.0f ); - Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); - // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); @@ -492,17 +482,17 @@ void WeightedClusterFit::Compress4( void* block ) int b0 = 0, b1 = 0, b2 = 0; // check all possible clusters for this total order - for( int c0 = 0; c0 <= count; c0++) + for( int c0 = 0; c0 <= 16; c0++) { Vec3 x1(0.0f); float w1 = 0.0f; - for( int c1 = 0; c1 <= count-c0; c1++) + for( int c1 = 0; c1 <= 16-c0; c1++) { Vec3 x2(0.0f); float w2 = 0.0f; - for( int c2 = 0; c2 <= count-c0-c1; c2++) + for( int c2 = 0; c2 <= 16-c0-c1; c2++) { float w3 = m_wsum - w0 - w1 - w2; @@ -517,9 +507,16 @@ void WeightedClusterFit::Compress4( void* block ) Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor; Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor; - // clamp to the grid + // clamp the output to [0, 1] + Vec3 const one( 1.0f ); + Vec3 const zero( 0.0f ); a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); + + // clamp to the grid + Vec3 const grid( 31.0f, 63.0f, 31.0f ); + Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f ); + Vec3 const half( 0.5f ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; @@ -568,20 +565,18 @@ void WeightedClusterFit::Compress4( void* block ) for(; i < b0+b1+b2; i++) { bestindices[i] = 3; } - for(; i < count; i++) { + for(; i < 16; i++) { bestindices[i] = 1; } } // remap the indices u8 ordered[16]; - for( int i = 0; i < count; ++i ) + for( int i = 0; i < 16; ++i ) ordered[m_order[i]] = bestindices[i]; - - m_colours->RemapIndices( ordered, bestindices ); // save the block - WriteColourBlock4( beststart, bestend, bestindices, block ); + WriteColourBlock4( beststart, bestend, ordered, block ); // save the error m_besterror = besterror; diff --git a/src/nvtt/squish/weightedclusterfit.h b/src/nvtt/squish/weightedclusterfit.h index a0a45fb..a8f6eea 100644 --- a/src/nvtt/squish/weightedclusterfit.h +++ b/src/nvtt/squish/weightedclusterfit.h @@ -24,15 +24,15 @@ -------------------------------------------------------------------------- */ -#ifndef NV_SQUISH_WEIGHTEDCLUSTERFIT_H -#define NV_SQUISH_WEIGHTEDCLUSTERFIT_H +#ifndef SQUISH_WEIGHTEDCLUSTERFIT_H +#define SQUISH_WEIGHTEDCLUSTERFIT_H #include "squish.h" #include "maths.h" #include "simd.h" #include "colourfit.h" -namespace nvsquish { +namespace squish { class WeightedClusterFit : public ColourFit { diff --git a/src/nvtt/tests/CMakeLists.txt b/src/nvtt/tests/CMakeLists.txt deleted file mode 100644 index b30c353..0000000 --- a/src/nvtt/tests/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ - -ADD_EXECUTABLE(filtertest filtertest.cpp ../tools/cmdline.h) -TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage) - -ADD_EXECUTABLE(nvtestsuite testsuite.cpp) -TARGET_LINK_LIBRARIES(nvtestsuite nvcore nvmath nvimage nvtt) -ADD_TEST(NVTT.TestSuite.Kodak.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 0 -out output-cuda-kodak) -ADD_TEST(NVTT.TestSuite.Waterloo.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 1 -out output-cuda-waterloo) -ADD_TEST(NVTT.TestSuite.Epic.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 2 -out output-cuda-epic) -ADD_TEST(NVTT.TestSuite.Kodak.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 0 -nocuda -out output-nocuda-kodak) -ADD_TEST(NVTT.TestSuite.Waterloo.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 1 -nocuda -out output-nocuda-waterloo) -ADD_TEST(NVTT.TestSuite.Epic.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 2 -nocuda -out output-nocuda-epic) - -IF (CUDA_FOUND) - ADD_EXECUTABLE(driverapitest driverapi.cpp) - TARGET_LINK_LIBRARIES(driverapitest nvcore nvmath nvimage) -ENDIF (CUDA_FOUND) - -ADD_EXECUTABLE(imperativeapi imperativeapi.cpp) -TARGET_LINK_LIBRARIES(imperativeapi nvcore nvmath nvimage nvtt) - -INSTALL(TARGETS nvtestsuite DESTINATION bin) - -#include_directories("/usr/include/ffmpeg/") -#ADD_EXECUTABLE(nvmpegenc tools/mpegenc.cpp tools/cmdline.h) -#TARGET_LINK_LIBRARIES(nvmpegenc nvcore nvmath nvimage avcodec z) -#INSTALL(TARGETS nvmpegenc DESTINATION bin) - diff --git a/src/nvtt/tests/driverapi.cpp b/src/nvtt/tests/driverapi.cpp deleted file mode 100644 index 4e6a203..0000000 --- a/src/nvtt/tests/driverapi.cpp +++ /dev/null @@ -1,144 +0,0 @@ - -#include - -#include -#include - -// Typedefs -typedef CUresult (CUDAAPI * cuInitPtr)( unsigned int Flags ); - -typedef CUresult (CUDAAPI * cuDeviceGetPtr)(CUdevice *device, int ordinal); -typedef CUresult (CUDAAPI * cuDeviceGetCountPtr)(int *count); -typedef CUresult (CUDAAPI * cuDeviceGetNamePtr)(char *name, int len, CUdevice dev); -typedef CUresult (CUDAAPI * cuDeviceComputeCapabilityPtr)(int *major, int *minor, CUdevice dev); -typedef CUresult (CUDAAPI * cuDeviceTotalMemPtr)(unsigned int *bytes, CUdevice dev); -typedef CUresult (CUDAAPI * cuDeviceGetPropertiesPtr)(CUdevprop *prop, CUdevice dev); -typedef CUresult (CUDAAPI * cuDeviceGetAttributePtr)(int *pi, CUdevice_attribute attrib, CUdevice dev); - -typedef CUresult (CUDAAPI * cuCtxCreatePtr)(CUcontext *pctx, unsigned int flags, CUdevice dev ); -typedef CUresult (CUDAAPI * cuCtxDestroyPtr)( CUcontext ctx ); -typedef CUresult (CUDAAPI * cuCtxAttachPtr)(CUcontext *pctx, unsigned int flags); -typedef CUresult (CUDAAPI * cuCtxDetachPtr)(CUcontext ctx); -typedef CUresult (CUDAAPI * cuCtxPushCurrentPtr)( CUcontext ctx ); -typedef CUresult (CUDAAPI * cuCtxPopCurrentPtr)( CUcontext *pctx ); -typedef CUresult (CUDAAPI * cuCtxGetDevicePtr)(CUdevice *device); -typedef CUresult (CUDAAPI * cuCtxSynchronizePtr)(void); - - -// A compressor inits CUDA and creates a context for each device. -// - -struct CudaDevice -{ - CUdevice device; - CUcontext context; -}; - -struct CudaContext -{ - CudaContext() - { - printf("CudaContext()\n"); - - #if NV_OS_WIN32 - Library nvcuda("nvcuda.dll"); - #else - Library nvcuda(NV_LIBRARY_NAME(cuda)); - #endif - - cuInit = (cuInitPtr)nvcuda.bindSymbol("cuInit"); - - cuDeviceGet = (cuDeviceGetPtr)nvcuda.bindSymbol("cuDeviceGet"); - cuDeviceGetCount = (cuDeviceGetCountPtr)nvcuda.bindSymbol("cuDeviceGetCount"); - cuDeviceGetName = (cuDeviceGetNamePtr)nvcuda.bindSymbol("cuDeviceGetName"); - cuDeviceComputeCapability = (cuDeviceComputeCapabilityPtr)nvcuda.bindSymbol("cuDeviceComputeCapability"); - cuDeviceTotalMem = (cuDeviceTotalMemPtr)nvcuda.bindSymbol("cuDeviceTotalMem"); - cuDeviceGetProperties = (cuDeviceGetPropertiesPtr)nvcuda.bindSymbol("cuDeviceGetProperties"); - cuDeviceGetAttribute = (cuDeviceGetAttributePtr)nvcuda.bindSymbol("cuDeviceGetAttribute"); - - cuCtxCreate = (cuCtxCreatePtr)nvcuda.bindSymbol("cuCtxCreate"); - cuCtxDestroy = (cuCtxDestroyPtr)nvcuda.bindSymbol("cuCtxDestroy"); - cuCtxAttach = (cuCtxAttachPtr)nvcuda.bindSymbol("cuCtxAttach"); - cuCtxDetach = (cuCtxDetachPtr)nvcuda.bindSymbol("cuCtxDetach"); - cuCtxPushCurrent = (cuCtxPushCurrentPtr)nvcuda.bindSymbol("cuCtxPushCurrent"); - cuCtxPopCurrent = (cuCtxPopCurrentPtr)nvcuda.bindSymbol("cuCtxPopCurrent"); - cuCtxGetDevice = (cuCtxGetDevicePtr)nvcuda.bindSymbol("cuCtxGetDevice"); - cuCtxSynchronize = (cuCtxSynchronizePtr)nvcuda.bindSymbol("cuCtxSynchronize"); - - CUresult status = cuInit(0); - - if (status == CUDA_SUCCESS) - { - printf("cuInit succeeded.\n"); - } - - m_deviceCount = 0; - cuDeviceGetCount(&m_deviceCount); - - printf("%d devices found.\n", m_deviceCount); - - if (m_deviceCount > 0) - { - m_devices = new CudaDevice[m_deviceCount]; - - uint flags = CU_CTX_SCHED_AUTO; - if (m_deviceCount > 1) flags = CU_CTX_SCHED_YIELD; - - for (int i = 0; i < m_deviceCount; i++) - { - cuDeviceGet(&m_devices[i].device, i); - cuCtxCreate(&m_devices[i].context, flags, m_devices[i].device); - - cuCtxDestroy(m_devices[i].context); - } - - } - } - - ~CudaContext() - { - printf("~CudaContext()\n"); - - if (m_deviceCount > 0) - { - for (int i = 0; i < m_deviceCount; i++) - { - cuCtxDestroy(m_devices[i].context); - } - - delete [] m_devices; - } - } - - -public: - cuInitPtr cuInit; - - cuDeviceGetPtr cuDeviceGet; - cuDeviceGetCountPtr cuDeviceGetCount; - cuDeviceGetNamePtr cuDeviceGetName; - cuDeviceComputeCapabilityPtr cuDeviceComputeCapability; - cuDeviceTotalMemPtr cuDeviceTotalMem; - cuDeviceGetPropertiesPtr cuDeviceGetProperties; - cuDeviceGetAttributePtr cuDeviceGetAttribute; - - cuCtxCreatePtr cuCtxCreate; - cuCtxDestroyPtr cuCtxDestroy; - cuCtxAttachPtr cuCtxAttach; - cuCtxDetachPtr cuCtxDetach; - cuCtxPushCurrentPtr cuCtxPushCurrent; - cuCtxPopCurrentPtr cuCtxPopCurrent; - cuCtxGetDevicePtr cuCtxGetDevice; - cuCtxSynchronizePtr cuCtxSynchronize; - - int m_deviceCount; - CudaDevice * m_devices; -}; - -int main(void) -{ - CudaContext ctx; -// cuInit(0); - - return 0; -} diff --git a/src/nvtt/tests/mpegenc.cpp b/src/nvtt/tests/mpegenc.cpp deleted file mode 100644 index 3904b10..0000000 --- a/src/nvtt/tests/mpegenc.cpp +++ /dev/null @@ -1,344 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include - -#include - -#include -#include - -#include - -#include "cmdline.h" - -extern "C" { -#include -//#include -} - -// http://ffmpeg.mplayerhq.hu/general.html -// http://cekirdek.pardus.org.tr/~ismail/ffmpeg-docs/apiexample_8c-source.html - - -using namespace nv; - -static float s_quality = 0.5f; - -static AVFrame * createPicture(const Image & image) -{ - const uint w = image.width(); - const uint h = image.height(); - const uint size = w * h; - - AVFrame * picture = avcodec_alloc_frame(); - - uint8_t * buffer = (uint8_t *)malloc((size * 3) / 2); - - picture->data[0] = buffer; - picture->data[1] = buffer + size; - picture->data[2] = buffer + size + size / 4; - picture->linesize[0] = w; - picture->linesize[1] = w / 2; - picture->linesize[2] = w / 2; - - memset(buffer, 0, (size * 3) / 2); - - // Convert image to YCbCr 4:2:0 - - // Y - for (uint y=0;ydata[0][y * picture->linesize[0] + x] = (uint8)clamp(Y, 0.0f, 255.0f); - } - } - - // Cb and Cr - for (uint y=0;ydata[1][y * picture->linesize[1] + x] = (uint8)clamp(Cb, 0.0f, 255.0f);; - picture->data[2][y * picture->linesize[2] + x] = (uint8)clamp(Cr, 0.0f, 255.0f);; - } - } - - return picture; -} - -static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize, const char * filename) -{ - FILE * f = fopen(filename, "w"); - fprintf(f,"P5\n%d %d\n%d\n",xsize, ysize, 255); - - for (int i = 0; i < ysize; i++) - fwrite(buf + i * wrap,1,xsize,f); - - fclose(f); -} - -static void savePicture(const AVFrame * picture, int w, int h) -{ - // @@ Combine planes. - pgm_save(picture->data[0], picture->linesize[0], w, h, "test_y.pgm"); - pgm_save(picture->data[1], picture->linesize[1], w/2, h/2, "test_u.pgm"); - pgm_save(picture->data[2], picture->linesize[2], w/2, h/2, "test_v.pgm"); -} - -static double psnr(double d) { - return -10.0*log(d)/log(10.0); -} - - -static void encodeFrame(const Image & image, CodecID format, Array & frame) -{ - AVFrame * picture = createPicture(image); - - AVCodec * encoder = avcodec_find_encoder(format); - - if (encoder == NULL) - { - printf("MPEG encoder not found.\n"); - exit(1); - } - - AVCodecContext * encoder_context = avcodec_alloc_context(); - - //encoder_context->me_method = 0; - encoder_context->width = image.width(); - encoder_context->height = image.height(); - encoder_context->pix_fmt = PIX_FMT_YUV420P; - //encoder_context->pix_fmt = PIX_FMT_YUV422P; - //encoder_context->pix_fmt = PIX_FMT_YUVJ420P; - - encoder_context->time_base = (AVRational){1,25}; // required parameter. 25 fps? - encoder_context->bit_rate = 400000; // Quality? - //encoder_context->bit_rate = 200000; // Default - //encoder_context->bit_rate_tolerance = 20000; - //encoder_context->qmin = ?; - //encoder_context->qmax = ?; - //encoder_context->qcompress = ?; - //encoder_context->qblur = ?; - - encoder_context->flags |= CODEC_FLAG_PSNR; - encoder_context->qcompress = s_quality; - //encoder_context->qblur = 1.0f; - //encoder_context->global_quality = FF_QP2LAMBDA * 0; - //encoder_context->max_qdiff = 3; - - - - - // Intra frames only - encoder_context->gop_size = 0; - - if (avcodec_open(encoder_context, encoder) < 0) - { - printf("MPEG encoder initialization failed.\n"); - exit(1); - } - - frame.resize(1024 * 1024, 0); // resize and initialize to 0. - - int out_size = avcodec_encode_video(encoder_context, frame.mutableBuffer(), frame.size(), picture); - frame.resize(out_size); - - // Append sequence end code. - frame.append(0x00); - frame.append(0x00); - frame.append(0x01); - frame.append(0xb7); - - int in_size = image.width() * image.height() * 3; - printf("Image size %d -> %d (1:%d)\n", in_size, out_size, in_size/out_size); - printf("PSNR = %4.2f\n", psnr(encoder_context->coded_frame->error[0]/(encoder_context->width*encoder_context->height*255.0*255.0))); - - - avcodec_close(encoder_context); - av_free(encoder_context); - av_free(picture); -} - -static void decodeFrame(const Array & frame, CodecID format) -{ - AVCodec * decoder = avcodec_find_decoder(format); - if (decoder == NULL) { - printf("MPEG decoder not found.\n"); - exit(1); - } - - AVCodecContext * decoder_context = avcodec_alloc_context(); - AVFrame * picture = avcodec_alloc_frame(); - - if (decoder->capabilities & CODEC_CAP_TRUNCATED) - decoder_context->flags |= CODEC_FLAG_TRUNCATED; /* we do not send complete frames */ - - - if (avcodec_open(decoder_context, decoder) < 0) { - printf("MPEG decoder initialization failed.\n"); - exit(1); - } - - //memset(picture->data[0], 0, in_size / 2); - - int got_picture = 0; - int len = avcodec_decode_video(decoder_context, picture, &got_picture, frame.buffer(), frame.size()); - - printf("decoded %d bytes\n", len); - - if (len < 0) { - printf("Error while decoding frame.\n"); - exit(1); - } - - if (!got_picture) { - printf("Did not get any picture.\n"); - exit(1); - } - - //nvDebugCheck(outbuf_size == len); - //nvDebugCheck(got_picture == true); - - savePicture(picture, decoder_context->width, decoder_context->height); - - avcodec_close(decoder_context); - av_free(decoder_context); - av_free(picture); -} - - - -int main(int argc, char *argv[]) -{ - MyAssertHandler assertHandler; - MyMessageHandler messageHandler; - - nv::Path input; - nv::Path output; - - // Parse arguments. - for (int i = 1; i < argc; i++) - { - if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quality") == 0) - { - if (i+1 < argc && argv[i+1][0] != '-') - { - s_quality = atof(argv[i+1]); - i++; - } - } - - else if (argv[i][0] != '-') - { - input = argv[i]; - - if (i+1 < argc && argv[i+1][0] != '-') - { - output = argv[i+1]; - i++; - } - else - { - output.copy(input.str()); - output.stripExtension(); - output.append(".mpeg"); - } - - break; - } - } - - printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007-2008\n\n"); - - if (input.isNull()) - { - printf("usage: nvmpegcompress [options] infile [outfile]\n\n"); - - return 1; - } - - // Load image. - Image image; - if (!image.load(input)) - { - fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str()); - return 1; - } - - // Initialize codecs. - avcodec_init(); - avcodec_register_all(); - - //CodecID format = CODEC_ID_MPEG1VIDEO; - CodecID format = CODEC_ID_MPEG2VIDEO; - //CodecID format = CODEC_ID_MJPEG; - //CodecID format = CODEC_ID_THEORA; - //CodecID format = CODEC_ID_H264; - - // Encode frame. - Array frame; - encodeFrame(image, format, frame); - - // Save resulting I-frame. - StdOutputStream outputStream(output.str()); - if (outputStream.isError()) - { - printf("Error opening '%s' for writing.\n", output.str()); - return 1; - } - - outputStream.serialize(frame.mutableBuffer(), frame.size()); - - //decodeFrame(frame, format); - - // @@ Compare image against original, and compute RMS. - - return 0; -} - diff --git a/src/nvtt/tests/testsuite.cpp b/src/nvtt/tests/testsuite.cpp deleted file mode 100644 index 713b074..0000000 --- a/src/nvtt/tests/testsuite.cpp +++ /dev/null @@ -1,529 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include // free -#include // memcpy - - -using namespace nv; - -// Kodak image set -static const char * s_kodakImageSet[] = { - "kodim01.png", - "kodim02.png", - "kodim03.png", - "kodim04.png", - "kodim05.png", - "kodim06.png", - "kodim07.png", - "kodim08.png", - "kodim09.png", - "kodim10.png", - "kodim11.png", - "kodim12.png", - "kodim13.png", - "kodim14.png", - "kodim15.png", - "kodim16.png", - "kodim17.png", - "kodim18.png", - "kodim19.png", - "kodim20.png", - "kodim21.png", - "kodim22.png", - "kodim23.png", - "kodim24.png", -}; - -// Waterloo image set -static const char * s_waterlooImageSet[] = { - "clegg.png", - "frymire.png", - "lena.png", - "monarch.png", - "peppers.png", - "sail.png", - "serrano.png", - "tulips.png", -}; - -// Epic image set -static const char * s_epicImageSet[] = { - "Bradley1.png", - "Gradient.png", - "MoreRocks.png", - "Wall.png", - "Rainbow.png", - "Text.png", -}; - -// Farbrausch -static const char * s_farbrauschImageSet[] = { - "t.2d.pn02.bmp", - "t.aircondition.01.bmp", - "t.bricks.02.bmp", - "t.bricks.05.bmp", - "t.concrete.cracked.01.bmp", - "t.envi.colored02.bmp", - "t.envi.colored03.bmp", - "t.font.01.bmp", - "t.sewers.01.bmp", - "t.train.03.bmp", - "t.yello.01.bmp", -}; - -// Lugaru -static const char * s_lugaruImageSet[] = { - "lugaru-blood.png", - "lugaru-bush.png", - "lugaru-cursor.png", - "lugaru-hawk.png", -}; - -// Quake3 -static const char * s_quake3ImageSet[] = { - "q3-blocks15cgeomtrn.tga", - "q3-blocks17bloody.tga", - "q3-dark_tin2.tga", - "q3-fan_grate.tga", - "q3-fan.tga", - "q3-metal2_2.tga", - "q3-panel_glo.tga", - "q3-proto_fence.tga", - "q3-wires02.tga", -}; - - -struct ImageSet -{ - const char ** fileNames; - int fileCount; - nvtt::Format format; -}; - -static ImageSet s_imageSets[] = { - {s_kodakImageSet, sizeof(s_kodakImageSet)/sizeof(s_kodakImageSet[0]), nvtt::Format_DXT1}, - {s_waterlooImageSet, sizeof(s_waterlooImageSet)/sizeof(s_waterlooImageSet[0]), nvtt::Format_DXT1}, - {s_epicImageSet, sizeof(s_epicImageSet)/sizeof(s_epicImageSet[0]), nvtt::Format_DXT1}, - {s_farbrauschImageSet, sizeof(s_farbrauschImageSet)/sizeof(s_farbrauschImageSet[0]), nvtt::Format_DXT1}, - {s_lugaruImageSet, sizeof(s_lugaruImageSet)/sizeof(s_lugaruImageSet[0]), nvtt::Format_DXT5}, - {s_quake3ImageSet, sizeof(s_quake3ImageSet)/sizeof(s_quake3ImageSet[0]), nvtt::Format_DXT5}, -}; -const int s_imageSetCount = sizeof(s_imageSets)/sizeof(s_imageSets[0]); - -enum Decoder -{ - Decoder_Reference, - Decoder_NVIDIA, -}; - -struct MyOutputHandler : public nvtt::OutputHandler -{ - MyOutputHandler() : m_data(NULL), m_ptr(NULL) {} - ~MyOutputHandler() - { - free(m_data); - } - - virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) - { - m_size = size; - m_width = width; - m_height = height; - free(m_data); - m_data = (unsigned char *)malloc(size); - m_ptr = m_data; - } - - virtual bool writeData(const void * data, int size) - { - memcpy(m_ptr, data, size); - m_ptr += size; - return true; - } - - Image * decompress(nvtt::Format format, Decoder decoder) - { - int bw = (m_width + 3) / 4; - int bh = (m_height + 3) / 4; - - AutoPtr img( new Image() ); - img->allocate(m_width, m_height); - - if (format == nvtt::Format_BC1) - { - BlockDXT1 * block = (BlockDXT1 *)m_data; - - for (int y = 0; y < bh; y++) - { - for (int x = 0; x < bw; x++) - { - ColorBlock colors; - if (decoder == Decoder_Reference) { - block->decodeBlock(&colors); - } - else if (decoder == Decoder_NVIDIA) { - block->decodeBlockNV5x(&colors); - } - - for (int yy = 0; yy < 4; yy++) - { - for (int xx = 0; xx < 4; xx++) - { - Color32 c = colors.color(xx, yy); - - if (x * 4 + xx < m_width && y * 4 + yy < m_height) - { - img->pixel(x * 4 + xx, y * 4 + yy) = c; - } - } - } - - block++; - } - } - } - else if (format == nvtt::Format_BC3) - { - BlockDXT5 * block = (BlockDXT5 *)m_data; - - for (int y = 0; y < bh; y++) - { - for (int x = 0; x < bw; x++) - { - ColorBlock colors; - if (decoder == Decoder_Reference) { - block->decodeBlock(&colors); - } - else if (decoder == Decoder_NVIDIA) { - block->decodeBlockNV5x(&colors); - } - - for (int yy = 0; yy < 4; yy++) - { - for (int xx = 0; xx < 4; xx++) - { - Color32 c = colors.color(xx, yy); - - if (x * 4 + xx < m_width && y * 4 + yy < m_height) - { - img->pixel(x * 4 + xx, y * 4 + yy) = c; - } - } - } - - block++; - } - } - } - - - return img.release(); - } - - int m_size; - int m_width; - int m_height; - unsigned char * m_data; - unsigned char * m_ptr; -}; - - -float rmsError(const Image * a, const Image * b) -{ - nvCheck(a != NULL); - nvCheck(b != NULL); - nvCheck(a->width() == b->width()); - nvCheck(a->height() == b->height()); - - double mse = 0; - - const uint count = a->width() * a->height(); - - for (uint i = 0; i < count; i++) - { - Color32 c0 = a->pixel(i); - Color32 c1 = b->pixel(i); - - int r = c0.r - c1.r; - int g = c0.g - c1.g; - int b = c0.b - c1.b; - int a = c0.a - c1.a; - - mse += double(r * r * c0.a) / 255; - mse += double(g * g * c0.a) / 255; - mse += double(b * b * c0.a) / 255; - } - - return float(sqrt(mse / count)); -} - - -int main(int argc, char *argv[]) -{ - const uint version = nvtt::version(); - const uint major = version / 100; - const uint minor = version % 100; - - printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007 - 2008\n\n", major, minor); - - int set = 0; - bool fast = false; - bool nocuda = false; - bool showHelp = false; - Decoder decoder = Decoder_Reference; - const char * basePath = ""; - const char * outPath = "output"; - const char * regressPath = NULL; - - // Parse arguments. - for (int i = 1; i < argc; i++) - { - if (strcmp("-set", argv[i]) == 0) - { - if (i+1 < argc && argv[i+1][0] != '-') { - set = atoi(argv[i+1]); - i++; - } - } - else if (strcmp("-dec", argv[i]) == 0) - { - if (i+1 < argc && argv[i+1][0] != '-') { - decoder = (Decoder)atoi(argv[i+1]); - i++; - } - } - else if (strcmp("-fast", argv[i]) == 0) - { - fast = true; - } - else if (strcmp("-nocuda", argv[i]) == 0) - { - nocuda = true; - } - else if (strcmp("-help", argv[i]) == 0) - { - showHelp = true; - } - else if (strcmp("-path", argv[i]) == 0) - { - if (i+1 < argc && argv[i+1][0] != '-') { - basePath = argv[i+1]; - i++; - } - } - else if (strcmp("-out", argv[i]) == 0) - { - if (i+1 < argc && argv[i+1][0] != '-') { - outPath = argv[i+1]; - i++; - } - } - else if (strcmp("-regress", argv[i]) == 0) - { - if (i+1 < argc && argv[i+1][0] != '-') { - regressPath = argv[i+1]; - i++; - } - } - } - - if (showHelp) - { - printf("usage: nvtestsuite [options]\n\n"); - - printf("Input options:\n"); - printf(" -path \tInput image path.\n"); - printf(" -regress \tRegression directory.\n"); - printf(" -set [0:2] \tImage set.\n"); - printf(" 0: \tKodak.\n"); - printf(" 1: \tWaterloo.\n"); - printf(" 2: \tEpic.\n"); - printf(" 3: \tFarbrausch.\n"); - printf(" -dec x \tDecompressor.\n"); - printf(" 0: \tReference.\n"); - printf(" 1: \tNVIDIA.\n"); - - printf("Compression options:\n"); - printf(" -fast \tFast compression.\n"); - printf(" -nocuda \tDo not use cuda compressor.\n"); - - printf("Output options:\n"); - printf(" -out \tOutput directory.\n"); - - return 1; - } - - nvtt::InputOptions inputOptions; - inputOptions.setMipmapGeneration(false); - inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency); - - nvtt::CompressionOptions compressionOptions; - compressionOptions.setFormat(nvtt::Format_BC1); - if (fast) - { - compressionOptions.setQuality(nvtt::Quality_Fastest); - } - else - { - compressionOptions.setQuality(nvtt::Quality_Production); - } - //compressionOptions.setExternalCompressor("ati"); - //compressionOptions.setExternalCompressor("squish"); - //compressionOptions.setExternalCompressor("d3dx"); - //compressionOptions.setExternalCompressor("stb"); - - compressionOptions.setFormat(s_imageSets[set].format); - - - nvtt::OutputOptions outputOptions; - outputOptions.setOutputHeader(false); - - MyOutputHandler outputHandler; - outputOptions.setOutputHandler(&outputHandler); - - nvtt::Context context; - context.enableCudaAcceleration(!nocuda); - - FileSystem::changeDirectory(basePath); - FileSystem::createDirectory(outPath); - - Path csvFileName; - csvFileName.format("%s/result.csv", outPath); - StdOutputStream csvStream(csvFileName); - TextWriter csvWriter(&csvStream); - - float totalTime = 0; - float totalRMSE = 0; - int failedTests = 0; - float totalDiff = 0; - - const char ** fileNames = s_imageSets[set].fileNames; - int fileCount = s_imageSets[set].fileCount; - - Timer timer; - - for (int i = 0; i < fileCount; i++) - { - AutoPtr img( new Image() ); - - if (!img->load(fileNames[i])) - { - printf("Input image '%s' not found.\n", fileNames[i]); - return EXIT_FAILURE; - } - - inputOptions.setTextureLayout(nvtt::TextureType_2D, img->width(), img->height()); - inputOptions.setMipmapData(img->pixels(), img->width(), img->height()); - - printf("Compressing: \t'%s'\n", fileNames[i]); - - timer.start(); - - context.process(inputOptions, compressionOptions, outputOptions); - - timer.stop(); - printf(" Time: \t%.3f sec\n", float(timer.elapsed()) / 1000); - totalTime += float(timer.elapsed()) / 1000; - - AutoPtr img_out( outputHandler.decompress(s_imageSets[set].format, decoder) ); - - Path outputFileName; - outputFileName.format("%s/%s", outPath, fileNames[i]); - outputFileName.stripExtension(); - outputFileName.append(".png"); - if (!ImageIO::save(outputFileName, img_out.ptr())) - { - printf("Error saving file '%s'.\n", outputFileName.str()); - } - - float rmse = rmsError(img.ptr(), img_out.ptr()); - totalRMSE += rmse; - - printf(" RMSE: \t%.4f\n", rmse); - - // Output csv file - csvWriter << "\"" << fileNames[i] << "\"," << rmse << "\n"; - - if (regressPath != NULL) - { - Path regressFileName; - regressFileName.format("%s/%s", regressPath, fileNames[i]); - regressFileName.stripExtension(); - regressFileName.append(".png"); - - AutoPtr img_reg( new Image() ); - if (!img_reg->load(regressFileName.str())) - { - printf("Regression image '%s' not found.\n", regressFileName.str()); - return EXIT_FAILURE; - } - - float rmse_reg = rmsError(img.ptr(), img_reg.ptr()); - - float diff = rmse_reg - rmse; - totalDiff += diff; - - const char * text = "PASSED"; - if (equal(diff, 0)) text = "PASSED"; - else if (diff < 0) { - text = "FAILED"; - failedTests++; - } - - printf(" Diff: \t%.4f (%s)\n", diff, text); - } - - fflush(stdout); - } - - totalRMSE /= fileCount; - totalDiff /= fileCount; - - printf("Total Results:\n"); - printf(" Total Time: \t%.3f sec\n", totalTime); - printf(" Average RMSE:\t%.4f\n", totalRMSE); - - if (regressPath != NULL) - { - printf("Regression Results:\n"); - printf(" Diff: %.4f\n", totalDiff); - printf(" %d/%d tests failed.\n", failedTests, fileCount); - } - - return EXIT_SUCCESS; -} - diff --git a/src/nvtt/tools/CMakeLists.txt b/src/nvtt/tools/CMakeLists.txt deleted file mode 100644 index 3bb7b1b..0000000 --- a/src/nvtt/tools/CMakeLists.txt +++ /dev/null @@ -1,63 +0,0 @@ - - -ADD_EXECUTABLE(nvcompress compress.cpp cmdline.h) -TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt) - -ADD_EXECUTABLE(nvdecompress decompress.cpp cmdline.h) -TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage) - -ADD_EXECUTABLE(nvddsinfo ddsinfo.cpp cmdline.h) -TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage) - -ADD_EXECUTABLE(nvimgdiff imgdiff.cpp cmdline.h) -TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage) - -ADD_EXECUTABLE(nvassemble assemble.cpp cmdline.h) -TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage) - -ADD_EXECUTABLE(nvzoom resize.cpp cmdline.h) -TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage) - -ADD_EXECUTABLE(nv-gnome-thumbnailer thumbnailer.cpp cmdline.h) -TARGET_LINK_LIBRARIES(nv-gnome-thumbnailer nvcore nvmath nvimage) - -INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom nv-gnome-thumbnailer DESTINATION bin) - - # Use gconftool-2 to install gnome thumbnailer -FIND_PROGRAM(GCONFTOOL2 gconftool-2) - -IF(GCONFTOOL2) - CONFIGURE_FILE(nvtt-thumbnailer.schema.in ${CMAKE_CURRENT_BINARY_DIR}/nvtt-thumbnailer.schema) - - INSTALL(CODE "MESSAGE(STATUS \"Installing thumbnailer schema\")") - #gconftool-2 --get-default-source - INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${GCONFTOOL2} --get-default-source OUTPUT_VARIABLE GCONF_CONFIG_SOURCE OUTPUT_STRIP_TRAILING_WHITESPACE)") - INSTALL(CODE "set(ENV{GCONF_CONFIG_SOURCE} \"\${GCONF_CONFIG_SOURCE}\")") - INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${GCONFTOOL2} --makefile-install-rule ${CMAKE_CURRENT_BINARY_DIR}/nvtt-thumbnailer.schema)") -ENDIF(GCONFTOOL2) - -# UI tools -IF(QT4_FOUND) # AND NOT MSVC) - SET(QT_USE_QTOPENGL TRUE) - INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) - - SET(SRCS - ui/main.cpp - ui/configdialog.h - ui/configdialog.cpp) - - SET(LIBS - nvtt - ${QT_QTCORE_LIBRARY} - ${QT_QTGUI_LIBRARY} - ${QT_QTOPENGL_LIBRARY}) - - QT4_WRAP_UI(UICS ui/configdialog.ui) - QT4_WRAP_CPP(MOCS ui/configdialog.h) - #QT4_ADD_RESOURCES(RCCS ui/configdialog.rc) - - ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS}) - TARGET_LINK_LIBRARIES(nvcompressui ${LIBS}) - -ENDIF(QT4_FOUND) # AND NOT MSVC) - diff --git a/src/nvtt/tools/compress.cpp b/src/nvtt/tools/compress.cpp index d57a285..ea87ac3 100644 --- a/src/nvtt/tools/compress.cpp +++ b/src/nvtt/tools/compress.cpp @@ -21,20 +21,20 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include "cmdline.h" +#include +#include + +#include +#include #include -#include // @@ It might be a good idea to use FreeImage directly instead of ImageIO. -#include -#include -#include +#include "cmdline.h" -#include -#include -#include -#include -#include +#include // clock + +//#define WINDOWS_LEAN_AND_MEAN +//#include // TIMER struct MyOutputHandler : public nvtt::OutputHandler @@ -134,24 +134,18 @@ int main(int argc, char *argv[]) MyAssertHandler assertHandler; MyMessageHandler messageHandler; - bool alpha = false; bool normal = false; bool color2normal = false; bool wrapRepeat = false; bool noMipmaps = false; bool fast = false; bool nocuda = false; + bool silent = false; bool bc1n = false; nvtt::Format format = nvtt::Format_BC1; - bool premultiplyAlpha = false; - nvtt::MipmapFilter mipmapFilter = nvtt::MipmapFilter_Box; - bool loadAsFloat = false; const char * externalCompressor = NULL; - bool silent = false; - bool dds10 = false; - nv::Path input; nv::Path output; @@ -163,10 +157,6 @@ int main(int argc, char *argv[]) if (strcmp("-color", argv[i]) == 0) { } - else if (strcmp("-alpha", argv[i]) == 0) - { - alpha = true; - } else if (strcmp("-normal", argv[i]) == 0) { normal = true; @@ -186,23 +176,6 @@ int main(int argc, char *argv[]) { noMipmaps = true; } - else if (strcmp("-premula", argv[i]) == 0) - { - premultiplyAlpha = true; - } - else if (strcmp("-mipfilter", argv[i]) == 0) - { - if (i+1 == argc) break; - i++; - - if (strcmp("box", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Box; - else if (strcmp("triangle", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Triangle; - else if (strcmp("kaiser", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Kaiser; - } - else if (strcmp("-float", argv[i]) == 0) - { - loadAsFloat = true; - } // Compression options. else if (strcmp("-fast", argv[i]) == 0) @@ -260,15 +233,11 @@ int main(int argc, char *argv[]) } } - // Output options + // Misc options else if (strcmp("-silent", argv[i]) == 0) { silent = true; } - else if (strcmp("-dds10", argv[i]) == 0) - { - dds10 = true; - } else if (argv[i][0] != '-') { @@ -300,16 +269,12 @@ int main(int argc, char *argv[]) printf("usage: nvcompress [options] infile [outfile]\n\n"); printf("Input options:\n"); - printf(" -color \tThe input image is a color map (default).\n"); - printf(" -alpha \tThe input image has an alpha channel used for transparency.\n"); - printf(" -normal \tThe input image is a normal map.\n"); - printf(" -tonormal \tConvert input to normal map.\n"); - printf(" -clamp \tClamp wrapping mode (default).\n"); - printf(" -repeat \tRepeat wrapping mode.\n"); - printf(" -nomips \tDisable mipmap generation.\n"); - printf(" -premula \tPremultiply alpha into color channel.\n"); - printf(" -mipfilter \tMipmap filter. One of the following: box, triangle, kaiser.\n"); - printf(" -float \tLoad as floating point image.\n\n"); + printf(" -color \tThe input image is a color map (default).\n"); + printf(" -normal \tThe input image is a normal map.\n"); + printf(" -tonormal\tConvert input to normal map.\n"); + printf(" -clamp \tClamp wrapping mode (default).\n"); + printf(" -repeat \tRepeat wrapping mode.\n"); + printf(" -nomips \tDisable mipmap generation.\n\n"); printf("Compression options:\n"); printf(" -fast \tFast compression.\n"); @@ -324,19 +289,10 @@ int main(int argc, char *argv[]) printf(" -bc4 \tBC4 format (ATI1)\n"); printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n"); - printf("Output options:\n"); - printf(" -silent \tDo not output progress messages\n"); - printf(" -dds10 \tUse DirectX 10 DDS format\n\n"); - return EXIT_FAILURE; } - // Make sure input file exists. - if (!nv::FileSystem::exists(input.str())) - { - fprintf(stderr, "The file '%s' does not exist.\n", input.str()); - return 1; - } + // @@ Make sure input file exists. // Set input options. nvtt::InputOptions inputOptions; @@ -378,7 +334,7 @@ int main(int argc, char *argv[]) { for (uint m = 0; m < mipmapCount; m++) { - dds.mipmap(&mipmap, f, m); // @@ Load as float. + dds.mipmap(&mipmap, f, m); inputOptions.setMipmapData(mipmap.pixels(), mipmap.width(), mipmap.height(), 1, f, m); } @@ -386,42 +342,16 @@ int main(int argc, char *argv[]) } else { - if (nv::strCaseCmp(input.extension(), ".exr") == 0) + // Regular image. + nv::Image image; + if (!image.load(input)) { - loadAsFloat = true; - } - - if (loadAsFloat) - { - nv::AutoPtr image(nv::ImageIO::loadFloat(input)); - - if (image == NULL) - { - fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str()); - return EXIT_FAILURE; - } - - inputOptions.setFormat(nvtt::InputFormat_RGBA_32F); - inputOptions.setTextureLayout(nvtt::TextureType_2D, image->width(), image->height()); - - for (uint i = 0; i < image->componentNum(); i++) - { - inputOptions.setMipmapChannelData(image->channel(i), i, image->width(), image->height()); - } - } - else - { - // Regular image. - nv::Image image; - if (!image.load(input)) - { - fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str()); - return 1; - } - - inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height()); - inputOptions.setMipmapData(image.pixels(), image.width(), image.height()); + fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str()); + return EXIT_FAILURE; } + + inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height()); + inputOptions.setMipmapData(image.pixels(), image.width(), image.height()); } if (wrapRepeat) @@ -433,15 +363,6 @@ int main(int argc, char *argv[]) inputOptions.setWrapMode(nvtt::WrapMode_Clamp); } - if (alpha) - { - inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency); - } - else - { - inputOptions.setAlphaMode(nvtt::AlphaMode_None); - } - if (normal) { setNormalMap(inputOptions); @@ -460,31 +381,8 @@ int main(int argc, char *argv[]) inputOptions.setMipmapGeneration(false); } - if (premultiplyAlpha) - { - inputOptions.setPremultiplyAlpha(true); - inputOptions.setAlphaMode(nvtt::AlphaMode_Premultiplied); - } - - inputOptions.setMipmapFilter(mipmapFilter); - nvtt::CompressionOptions compressionOptions; compressionOptions.setFormat(format); - - if (format == nvtt::Format_BC2) { - // Dither alpha when using BC2. - compressionOptions.setQuantization(false, true, false); - } - - if (format == nvtt::Format_RGBA) - { - // @@ Edit this to choose the desired pixel format: - // compressionOptions.setPixelType(nvtt::PixelType_Float); - // compressionOptions.setPixelFormat(16, 16, 16, 16); - // compressionOptions.setPixelType(nvtt::PixelType_UnsignedNorm); - // compressionOptions.setPixelFormat(16, 0, 0, 0); - } - if (fast) { compressionOptions.setQuality(nvtt::Quality_Fastest); @@ -515,11 +413,11 @@ int main(int argc, char *argv[]) return EXIT_FAILURE; } - nvtt::Context context; - context.enableCudaAcceleration(!nocuda); + nvtt::Compressor compressor; + compressor.enableCudaAcceleration(!nocuda); printf("CUDA acceleration "); - if (context.isCudaAccelerationEnabled()) + if (compressor.isCudaAccelerationEnabled()) { printf("ENABLED\n\n"); } @@ -528,7 +426,7 @@ int main(int argc, char *argv[]) printf("DISABLED\n\n"); } - outputHandler.setTotal(context.estimateSize(inputOptions, compressionOptions)); + outputHandler.setTotal(compressor.estimateSize(inputOptions, compressionOptions)); outputHandler.setDisplayProgress(!silent); nvtt::OutputOptions outputOptions; @@ -536,25 +434,19 @@ int main(int argc, char *argv[]) outputOptions.setOutputHandler(&outputHandler); outputOptions.setErrorHandler(&errorHandler); - if (dds10) - { - outputOptions.setContainer(nvtt::Container_DDS10); - } - // printf("Press ENTER.\n"); // fflush(stdout); // getchar(); - Timer timer; - timer.start(); + clock_t start = clock(); - if (!context.process(inputOptions, compressionOptions, outputOptions)) + if (!compressor.process(inputOptions, compressionOptions, outputOptions)) { return EXIT_FAILURE; } - timer.stop(); - printf("\rtime taken: %.3f seconds\n", timer.elapsed()); + clock_t end = clock(); + printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); return EXIT_SUCCESS; } diff --git a/src/nvtt/tools/ui/configdialog.cpp b/src/nvtt/tools/configdialog.cpp similarity index 100% rename from src/nvtt/tools/ui/configdialog.cpp rename to src/nvtt/tools/configdialog.cpp diff --git a/src/nvtt/tools/ui/configdialog.h b/src/nvtt/tools/configdialog.h similarity index 100% rename from src/nvtt/tools/ui/configdialog.h rename to src/nvtt/tools/configdialog.h diff --git a/src/nvtt/tools/ui/configdialog.ui b/src/nvtt/tools/configdialog.ui similarity index 99% rename from src/nvtt/tools/ui/configdialog.ui rename to src/nvtt/tools/configdialog.ui index 42d2b49..a002798 100644 --- a/src/nvtt/tools/ui/configdialog.ui +++ b/src/nvtt/tools/configdialog.ui @@ -12,6 +12,9 @@ NVIDIA Texture Tools + + + true diff --git a/src/nvtt/tools/decompress.cpp b/src/nvtt/tools/decompress.cpp index b51df5c..ffb2ceb 100644 --- a/src/nvtt/tools/decompress.cpp +++ b/src/nvtt/tools/decompress.cpp @@ -31,161 +31,41 @@ #include "cmdline.h" -#include // clock - int main(int argc, char *argv[]) { MyAssertHandler assertHandler; MyMessageHandler messageHandler; - bool forcenormal = false; - bool mipmaps = false; - bool faces = false; - bool savePNG = false; - - nv::Path input; - nv::Path output; - - // Parse arguments. - for (int i = 1; i < argc; i++) - { - if (strcmp("-forcenormal", argv[i]) == 0) - { - forcenormal = true; - } - else if (strcmp("-mipmaps", argv[i]) == 0) - { - mipmaps = true; - } - else if (strcmp("-faces", argv[i]) == 0) - { - faces = true; - } - else if (strcmp("-format", argv[i]) == 0) - { - if (i+1 == argc) break; - i++; - -#ifdef HAVE_PNG - if (strcmp("png", argv[i]) == 0) savePNG = true; - else -#endif - if (strcmp("tga", argv[i]) == 0) savePNG = false; - else - { - fprintf(stderr, "Unsupported output format '%s', defaulting to 'tga'.\n", argv[i]); - savePNG = false; - } - } - else if (argv[i][0] != '-') - { - input = argv[i]; - - if (i+1 < argc && argv[i+1][0] != '-') - { - output = argv[i+1]; - } - else - { - output.copy(input.str()); - } - - break; - } - } - - printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n"); - - if (input.isNull()) + if (argc != 2) { - printf("usage: nvdecompress [options] infile [outfile]\n\n"); - - printf("Note: the .tga or .png extension is forced on outfile\n\n"); - - printf("Input options:\n"); - printf(" -forcenormal \tThe input image is a normal map.\n"); - printf(" -mipmaps \tDecompress all mipmaps.\n"); - printf(" -faces \tDecompress all faces.\n"); - printf(" -format \tOutput format ('tga' or 'png').\n"); - - return 1; - } - - // Load surface. - nv::DirectDrawSurface dds(input); - if (!dds.isValid()) - { - fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str()); + printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n"); + printf("usage: nvdecompress 'ddsfile'\n\n"); return 1; } - if (!dds.isSupported() || dds.isTexture3D()) + // Load surface. + nv::DirectDrawSurface dds(argv[1]); + if (!dds.isValid()) { - fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str()); + printf("The file '%s' is not a valid DDS file.\n", argv[1]); return 1; } - uint faceCount; - if (dds.isTexture2D()) - { - faceCount = 1; - } - else - { - nvCheck(dds.isTextureCube()); - faceCount = 6; - } - - uint mipmapCount = dds.mipmapCount(); + nv::Path name(argv[1]); + name.stripExtension(); + name.append(".tga"); - clock_t start = clock(); - - // apply arguments - if (forcenormal) - { - dds.setNormalFlag(true); - } - if (!faces) - { - faceCount = 1; - } - if (!mipmaps) - { - mipmapCount = 1; + nv::StdOutputStream stream(name.str()); + if (stream.isError()) { + printf("Error opening '%s' for writting\n", name.str()); + return 1; } - - nv::Image mipmap; - nv::Path name; - - // strip extension, we force the tga extension - output.stripExtension(); - - // extract faces and mipmaps - for (uint f = 0; f < faceCount; f++) - { - for (uint m = 0; m < mipmapCount; m++) - { - dds.mipmap(&mipmap, f, m); - // set output filename, if we are doing faces and/or mipmaps - name.copy(output); - if (faces) name.appendFormat("_face%d", f); - if (mipmaps) name.appendFormat("_mipmap%d", m); - name.append(savePNG ? ".png" : ".tga"); - - nv::StdOutputStream stream(name.str()); - if (stream.isError()) { - fprintf(stderr, "Error opening '%s' for writting\n", name.str()); - return 1; - } - - nv::ImageIO::save(name, stream, &mipmap); - } - } + // @@ TODO: Add command line options to output mipmaps, cubemap faces, etc. + nv::Image img; + dds.mipmap(&img, 0, 0); // get first image + nv::ImageIO::saveTGA(stream, &img); - clock_t end = clock(); - printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); - return 0; } diff --git a/src/nvtt/tools/ui/main.cpp b/src/nvtt/tools/main.cpp similarity index 100% rename from src/nvtt/tools/ui/main.cpp rename to src/nvtt/tools/main.cpp diff --git a/src/nvtt/tools/nvtt-thumbnailer.schema.in b/src/nvtt/tools/nvtt-thumbnailer.schema.in deleted file mode 100644 index fd3f394..0000000 --- a/src/nvtt/tools/nvtt-thumbnailer.schema.in +++ /dev/null @@ -1,26 +0,0 @@ - - - -/schemas/desktop/gnome/thumbnailers/image@x-dds/enable -/desktop/gnome/thumbnailers/image@x-dds/enable -nvtt-thumbnailer -bool -true - - - - - - -/schemas/desktop/gnome/thumbnailers/image@x-dds/command -/desktop/gnome/thumbnailers/image@x-dds/command -nvtt-thumbnailer -string -@CMAKE_INSTALL_PREFIX@/bin/nv-gnome-thumbnailer -s %s %i %o - - - - - - - \ No newline at end of file diff --git a/src/nvtt/tools/resize.cpp b/src/nvtt/tools/resize.cpp index 3dc8282..f23f80f 100644 --- a/src/nvtt/tools/resize.cpp +++ b/src/nvtt/tools/resize.cpp @@ -176,7 +176,7 @@ int main(int argc, char *argv[]) result->setFormat(nv::Image::Format_ARGB); nv::StdOutputStream stream(output); - nv::ImageIO::save(output, stream, result.ptr()); + nv::ImageIO::saveTGA(stream, result.ptr()); // @@ Add generic save function. Add support for png too. return 0; } diff --git a/src/nvtt/tools/thumbnailer.cpp b/src/nvtt/tools/thumbnailer.cpp deleted file mode 100644 index 8f5ea2c..0000000 --- a/src/nvtt/tools/thumbnailer.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include - -#include "cmdline.h" - -static bool loadImage(nv::Image & image, const char * fileName) -{ - if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0) - { - nv::DirectDrawSurface dds(fileName); - if (!dds.isValid()) - { - fprintf(stderr, "The file '%s' is not a valid DDS file.\n", fileName); - return false; - } - - dds.mipmap(&image, 0, 0); // get first image - } - else - { - // Regular image. - if (!image.load(fileName)) - { - fprintf(stderr, "The file '%s' is not a supported image type.\n", fileName); - return false; - } - } - - return true; -} - - -int main(int argc, char *argv[]) -{ - //MyAssertHandler assertHandler; - MyMessageHandler messageHandler; - - float gamma = 2.2f; - nv::Path input; - nv::Path output; - int size = 128; - - // Parse arguments. - for (int i = 1; i < argc; i++) - { - // Input options. - if (strcmp("-s", argv[i]) == 0) - { - if (i+1 < argc && argv[i+1][0] != '-') { - size = (int)atoi(argv[i+1]); - i++; - } - } - else if (argv[i][0] != '-') - { - input = argv[i]; - - if (i+1 < argc && argv[i+1][0] != '-') { - output = argv[i+1]; - } - else - { - fprintf(stderr, "No output filename.\n"); - return 1; - } - - break; - } - } - - if (input.isNull() || output.isNull()) - { - printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n"); - - printf("usage: nv-gnome-thumbnailer [options] input output\n\n"); - - printf("Options:\n"); - printf(" -s size\tThumbnail size (default = 128)\n"); - - return 1; - } - - nv::Image image; - if (!loadImage(image, input)) return 1; - - nv::ImageIO::ImageMetaData metaData; - metaData.tagMap.add("Thumb::Image::Width", nv::StringBuilder().number (image.width())); - metaData.tagMap.add("Thumb::Image::Height", nv::StringBuilder().number (image.height())); - - if ((image.width() > size) || (image.height() > size)) - { - nv::FloatImage fimage(&image); - fimage.toLinear(0, 3, gamma); - - uint thumbW, thumbH; - if (image.width() > image.height()) - { - thumbW = size; - thumbH = uint ((float (image.height()) / float (image.width())) * size); - } - else - { - thumbW = uint ((float (image.width()) / float (image.height())) * size); - thumbH = size; - } - nv::AutoPtr fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp)); - - nv::AutoPtr result(fresult->createImageGammaCorrect(gamma)); - result->setFormat(nv::Image::Format_ARGB); - - nv::StdOutputStream stream(output); - nv::ImageIO::save(output, stream, result.ptr(), &metaData); - } - else - { - nv::StdOutputStream stream(output); - nv::ImageIO::save(output, stream, &image, &metaData); - } - - return 0; -} -