2 Commits
2.0.5 ... 2.0.1

Author SHA1 Message Date
fa4a7b9af7 Add proper credits. 2008-02-28 22:52:00 +00:00
f111d23637 Tag 2.0.1 2008-02-28 22:37:49 +00:00
68 changed files with 3503 additions and 3054 deletions

View File

@ -1,4 +1,4 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0) CMAKE_MINIMUM_REQUIRED(VERSION 2.4.0)
PROJECT(NV) PROJECT(NV)
ENABLE_TESTING() ENABLE_TESTING()
@ -16,13 +16,6 @@ MESSAGE(STATUS "Setting optimal options")
MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}") MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}")
MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}") MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}")
IF(NVTT_SHARED)
SET(NVCORE_SHARED TRUE)
SET(NVMATH_SHARED TRUE)
SET(NVIMAGE_SHARED TRUE)
ENDIF(NVTT_SHARED)
ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(src)
IF(WIN32) IF(WIN32)

View File

@ -1,32 +1,3 @@
NVIDIA Texture Tools version 2.0.5
* Fix error in single color compressor. Fixes issue 66.
* Detect mismatch between CUDA runtime and driver, and disable CUDA in that case.
* Fix cmake files when compiling NVTT as a shared library.
* When linking nvtt dynamically on unix, link all libraries dynamically.
* Select fastest CUDA device.
NVIDIA Texture Tools version 2.0.4
* Fix error in RGB format output; reported by jonsoh. See issue 49.
* Added support RGB format dithering by jonsoh. Fixes issue 50 and 51.
* Prevent infinite loop in indexMirror when width equal 1. Fixes issue 65.
* Implement general scale filter, including upsampling.
NVIDIA Texture Tools version 2.0.3
* More accurate DXT3 compressor. Fixes issue 38.
* Remove legacy compressors. Fix issue 34.
* Check for single color in all compressors. Fixes issue 43.
* Fix error in fast downsample filter, reported by Noel Llopis.
NVIDIA Texture Tools version 2.0.2
* Fix copy ctor error reported by Richard Sim.
* Fix indexMirror error reported by Chris Lambert.
* Fix vc8 post build command, reported by Richard Sim.
* Fix RGBA modes with less than 32 bpp by Viktor Linder.
* Fix alpha decompression by Amorilia. See issue 40.
* Avoid default-initialized constructors for POD types, reported by Jim Tilander.
* Add single color compresor for DXT1a.
* Set swizzle code to ATI2 files. See issue 41.
NVIDIA Texture Tools version 2.0.1 NVIDIA Texture Tools version 2.0.1
* Fix memory leaks. * Fix memory leaks.
* Pre-allocate device memory for CUDA compressor. * Pre-allocate device memory for CUDA compressor.

View File

@ -1 +1 @@
2.0.5 2.0.1

View File

@ -46,9 +46,9 @@ FIND_LIBRARY (CUDA_RUNTIME_LIBRARY
DOC "The CUDA runtime library") DOC "The CUDA runtime library")
IF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY) IF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY)
SET (CUDA_FOUND TRUE) SET (CUDA_FOUND 1 CACHE STRING "Set to 1 if CUDA is found, 0 otherwise")
ELSE (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY) ELSE (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY)
SET (CUDA_FOUND FALSE) SET (CUDA_FOUND 0 CACHE STRING "Set to 1 if CUDA is found, 0 otherwise")
ENDIF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY) ENDIF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY)
SET (CUDA_LIBRARIES ${CUDA_RUNTIME_LIBRARY}) SET (CUDA_LIBRARIES ${CUDA_RUNTIME_LIBRARY})
@ -57,7 +57,7 @@ MARK_AS_ADVANCED (CUDA_FOUND CUDA_COMPILER CUDA_RUNTIME_LIBRARY)
#SET(CUDA_OPTIONS "-ncfe") #SET(CUDA_OPTIONS "-ncfe")
SET(CUDA_OPTIONS "--host-compilation=C") SET(CUDA_OPTIONS "")
IF (CUDA_EMULATION) IF (CUDA_EMULATION)
SET (CUDA_OPTIONS "${CUDA_OPTIONS} -deviceemu") SET (CUDA_OPTIONS "${CUDA_OPTIONS} -deviceemu")

2
configure vendored
View File

@ -53,7 +53,7 @@ echo "-- Configuring nvidia-texture-tools "`cat VERSION`
mkdir -p ./build mkdir -p ./build
cd ./build cd ./build
$CMAKE .. -DNVTT_SHARED=1 -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1 $CMAKE .. -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1
cd .. cd ..
echo "" echo ""

0
gnuwin32/bin/libpng12.dll Executable file → Normal file
View File

View File

@ -278,7 +278,6 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib" AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe" OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib" AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17" TargetMachine="17"
/> />
<Tool <Tool

View File

@ -281,10 +281,6 @@
RelativePath="..\..\..\src\nvcore\Debug.cpp" RelativePath="..\..\..\src\nvcore\Debug.cpp"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvcore\Library.cpp"
>
</File>
<File <File
RelativePath="..\..\..\src\nvcore\Memory.cpp" RelativePath="..\..\..\src\nvcore\Memory.cpp"
> >
@ -319,10 +315,6 @@
RelativePath="..\..\..\src\nvcore\DefsVcWin32.h" RelativePath="..\..\..\src\nvcore\DefsVcWin32.h"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvcore\Library.h"
>
</File>
<File <File
RelativePath="..\..\..\src\nvcore\Memory.h" RelativePath="..\..\..\src\nvcore\Memory.h"
> >

View File

@ -277,7 +277,6 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib" AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe" OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib" AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17" TargetMachine="17"
/> />
<Tool <Tool

View File

@ -355,10 +355,6 @@
RelativePath="..\..\..\src\nvimage\nvimage.h" RelativePath="..\..\..\src\nvimage\nvimage.h"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvimage\PixelFormat.h"
>
</File>
<File <File
RelativePath="..\..\..\src\nvimage\PsdFile.h" RelativePath="..\..\..\src\nvimage\PsdFile.h"
> >

View File

@ -278,7 +278,11 @@
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}" UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
> >
<File <File
RelativePath="..\..\..\src\nvmath\Plane.cpp" RelativePath="..\..\..\src\nvmath\Eigen.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Fitting.cpp"
> >
</File> </File>
</Filter> </Filter>
@ -296,11 +300,15 @@
> >
</File> </File>
<File <File
RelativePath="..\..\..\src\nvmath\Matrix.h" RelativePath="..\..\..\src\nvmath\Eigen.h"
> >
</File> </File>
<File <File
RelativePath="..\..\..\src\nvmath\Plane.h" RelativePath="..\..\..\src\nvmath\Fitting.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Matrix.h"
> >
</File> </File>
<File <File

View File

@ -53,8 +53,8 @@ END
// //
VS_VERSION_INFO VERSIONINFO VS_VERSION_INFO VERSIONINFO
FILEVERSION 2,0,5,0 FILEVERSION 2,0,1,0
PRODUCTVERSION 2,0,5,0 PRODUCTVERSION 2,0,1,0
FILEFLAGSMASK 0x17L FILEFLAGSMASK 0x17L
#ifdef _DEBUG #ifdef _DEBUG
FILEFLAGS 0x1L FILEFLAGS 0x1L
@ -71,12 +71,12 @@ BEGIN
BEGIN BEGIN
VALUE "CompanyName", "NVIDIA Corporation" VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library" VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 0, 5, 0" VALUE "FileVersion", "2, 0, 1, 0"
VALUE "InternalName", "nvtt" VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007" VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "OriginalFilename", "nvtt.dll" VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library" VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 0, 5, 0" VALUE "ProductVersion", "2, 0, 1, 0"
END END
END END
BLOCK "VarFileInfo" BLOCK "VarFileInfo"

View File

@ -97,7 +97,7 @@
<Tool <Tool
Name="VCPostBuildEventTool" Name="VCPostBuildEventTool"
Description="Copying header files..." Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;" CommandLine="xcopy /y /f /i $(SolutionDir)\..\..\src\nvtt\nvtt*.h $(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\"
/> />
</Configuration> </Configuration>
<Configuration <Configuration
@ -261,7 +261,7 @@
<Tool <Tool
Name="VCPostBuildEventTool" Name="VCPostBuildEventTool"
Description="Copying header files..." Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;" CommandLine="xcopy /y /f /i $(SolutionDir)\..\..\src\nvtt\nvtt*.h $(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\"
/> />
</Configuration> </Configuration>
<Configuration <Configuration
@ -425,7 +425,7 @@
<Tool <Tool
Name="VCPostBuildEventTool" Name="VCPostBuildEventTool"
Description="Copying header files..." Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;" CommandLine="xcopy /y /f /i $(SolutionDir)\..\..\src\nvtt\nvtt*.h $(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\"
/> />
</Configuration> </Configuration>
<Configuration <Configuration
@ -585,7 +585,7 @@
<Tool <Tool
Name="VCPostBuildEventTool" Name="VCPostBuildEventTool"
Description="Copying header files..." Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;" CommandLine="xcopy /y /f /i $(SolutionDir)\..\..\src\nvtt\nvtt*.h $(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\"
/> />
</Configuration> </Configuration>
<Configuration <Configuration
@ -691,7 +691,7 @@
> >
<Tool <Tool
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;" CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h" AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj" Outputs="$(IntDir)\$(InputName).obj"
/> />
@ -701,7 +701,7 @@
> >
<Tool <Tool
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;" CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h" AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj" Outputs="$(IntDir)\$(InputName).obj"
/> />
@ -711,7 +711,7 @@
> >
<Tool <Tool
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;" CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h" AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj" Outputs="$(IntDir)\$(InputName).obj"
/> />
@ -721,7 +721,7 @@
> >
<Tool <Tool
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;" CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h" AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj" Outputs="$(IntDir)\$(InputName).obj"
/> />
@ -849,6 +849,10 @@
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp" RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvtt\FastCompressDXT.cpp"
>
</File>
<File <File
RelativePath="..\..\..\src\nvtt\InputOptions.cpp" RelativePath="..\..\..\src\nvtt\InputOptions.cpp"
> >
@ -861,10 +865,6 @@
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp" RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.cpp"
>
</File>
<File <File
RelativePath="..\..\..\src\nvtt\OutputOptions.cpp" RelativePath="..\..\..\src\nvtt\OutputOptions.cpp"
> >
@ -911,6 +911,10 @@
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h" RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvtt\FastCompressDXT.h"
>
</File>
<File <File
RelativePath="..\..\..\src\nvtt\InputOptions.h" RelativePath="..\..\..\src\nvtt\InputOptions.h"
> >
@ -923,10 +927,6 @@
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h" RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.h"
>
</File>
<File <File
RelativePath="..\..\..\src\nvtt\OutputOptions.h" RelativePath="..\..\..\src\nvtt\OutputOptions.h"
> >

View File

@ -50,7 +50,6 @@ ENDIF(CG_FOUND)
# CUDA # CUDA
INCLUDE(${NV_CMAKE_DIR}/FindCUDA.cmake) INCLUDE(${NV_CMAKE_DIR}/FindCUDA.cmake)
IF(CUDA_FOUND) IF(CUDA_FOUND)
SET(HAVE_CUDA ${CUDA_FOUND} CACHE BOOL "Set to TRUE if CUDA is found, FALSE otherwise")
MESSAGE(STATUS "Looking for CUDA - found") MESSAGE(STATUS "Looking for CUDA - found")
ELSE(CUDA_FOUND) ELSE(CUDA_FOUND)
MESSAGE(STATUS "Looking for CUDA - not found") MESSAGE(STATUS "Looking for CUDA - not found")
@ -59,7 +58,7 @@ ENDIF(CUDA_FOUND)
# Maya # Maya
INCLUDE(${NV_CMAKE_DIR}/FindMaya.cmake) INCLUDE(${NV_CMAKE_DIR}/FindMaya.cmake)
IF(MAYA_FOUND) IF(MAYA_FOUND)
SET(HAVE_MAYA ${MAYA_FOUND} CACHE BOOL "Set to TRUE if Maya is found, FALSE otherwise") SET(HAVE_MAYA MAYA_FOUND)
MESSAGE(STATUS "Looking for Maya - found") MESSAGE(STATUS "Looking for Maya - found")
ELSE(MAYA_FOUND) ELSE(MAYA_FOUND)
MESSAGE(STATUS "Looking for Maya - not found") MESSAGE(STATUS "Looking for Maya - not found")
@ -68,7 +67,7 @@ ENDIF(MAYA_FOUND)
# JPEG # JPEG
INCLUDE(FindJPEG) INCLUDE(FindJPEG)
IF(JPEG_FOUND) IF(JPEG_FOUND)
SET(HAVE_JPEG ${JPEG_FOUND} CACHE BOOL "Set to TRUE if JPEG is found, FALSE otherwise") SET(HAVE_JPEG JPEG_FOUND)
MESSAGE(STATUS "Looking for JPEG - found") MESSAGE(STATUS "Looking for JPEG - found")
ELSE(JPEG_FOUND) ELSE(JPEG_FOUND)
MESSAGE(STATUS "Looking for JPEG - not found") MESSAGE(STATUS "Looking for JPEG - not found")
@ -77,7 +76,7 @@ ENDIF(JPEG_FOUND)
# PNG # PNG
INCLUDE(FindPNG) INCLUDE(FindPNG)
IF(PNG_FOUND) IF(PNG_FOUND)
SET(HAVE_PNG ${PNG_FOUND} CACHE BOOL "Set to TRUE if PNG is found, FALSE otherwise") SET(HAVE_PNG PNG_FOUND)
MESSAGE(STATUS "Looking for PNG - found") MESSAGE(STATUS "Looking for PNG - found")
ELSE(PNG_FOUND) ELSE(PNG_FOUND)
MESSAGE(STATUS "Looking for PNG - not found") MESSAGE(STATUS "Looking for PNG - not found")
@ -86,7 +85,7 @@ ENDIF(PNG_FOUND)
# TIFF # TIFF
INCLUDE(FindTIFF) INCLUDE(FindTIFF)
IF(TIFF_FOUND) IF(TIFF_FOUND)
SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise") SET(HAVE_TIFF TIFF_FOUND)
MESSAGE(STATUS "Looking for TIFF - found") MESSAGE(STATUS "Looking for TIFF - found")
ELSE(TIFF_FOUND) ELSE(TIFF_FOUND)
MESSAGE(STATUS "Looking for TIFF - not found") MESSAGE(STATUS "Looking for TIFF - not found")
@ -95,7 +94,7 @@ ENDIF(TIFF_FOUND)
# OpenEXR # OpenEXR
INCLUDE(${NV_CMAKE_DIR}/FindOpenEXR.cmake) INCLUDE(${NV_CMAKE_DIR}/FindOpenEXR.cmake)
IF(OPENEXR_FOUND) IF(OPENEXR_FOUND)
SET(HAVE_OPENEXR ${OPENEXR_FOUND} CACHE BOOL "Set to TRUE if OpenEXR is found, FALSE otherwise") SET(HAVE_OPENEXR OPENEXR_FOUND)
MESSAGE(STATUS "Looking for OpenEXR - found") MESSAGE(STATUS "Looking for OpenEXR - found")
ELSE(OPENEXR_FOUND) ELSE(OPENEXR_FOUND)
MESSAGE(STATUS "Looking for OpenEXR - not found") MESSAGE(STATUS "Looking for OpenEXR - not found")

View File

@ -18,20 +18,16 @@ SET(CORE_SRCS
TextReader.cpp TextReader.cpp
TextWriter.h TextWriter.h
TextWriter.cpp TextWriter.cpp
Tokenizer.h
Tokenizer.cpp
Radix.h Radix.h
Radix.cpp Radix.cpp)
Library.h
Library.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
# targets # targets
ADD_DEFINITIONS(-DNVCORE_EXPORTS) ADD_DEFINITIONS(-DNVCORE_EXPORTS)
IF(UNIX)
SET(LIBS ${LIBS} ${CMAKE_DL_LIBS})
ENDIF(UNIX)
IF(NVCORE_SHARED) IF(NVCORE_SHARED)
ADD_LIBRARY(nvcore SHARED ${CORE_SRCS}) ADD_LIBRARY(nvcore SHARED ${CORE_SRCS})
ELSE(NVCORE_SHARED) ELSE(NVCORE_SHARED)

View File

@ -446,7 +446,7 @@ namespace nv
// Call default constructors // Call default constructors
for( i = old_size; i < new_size; i++ ) { for( i = old_size; i < new_size; i++ ) {
new(m_buffer+i) T; // placement new new(m_buffer+i) T(); // placement new
} }
} }

View File

@ -28,7 +28,7 @@
#endif #endif
#if NV_OS_LINUX && defined(HAVE_EXECINFO_H) #if NV_OS_LINUX && defined(HAVE_EXECINFO_H)
# include <execinfo.h> // backtrace # include <execinfo.h>
# if NV_CC_GNUC // defined(HAVE_CXXABI_H) # if NV_CC_GNUC // defined(HAVE_CXXABI_H)
# include <cxxabi.h> # include <cxxabi.h>
# endif # endif
@ -39,13 +39,6 @@
# include <sys/types.h> # include <sys/types.h>
# include <sys/sysctl.h> // sysctl # include <sys/sysctl.h> // sysctl
# include <ucontext.h> # include <ucontext.h>
# undef HAVE_EXECINFO_H
# if defined(HAVE_EXECINFO_H) // only after OSX 10.5
# include <execinfo.h> // backtrace
# if NV_CC_GNUC // defined(HAVE_CXXABI_H)
# include <cxxabi.h>
# endif
# endif
#endif #endif
#include <stdexcept> // std::runtime_error #include <stdexcept> // std::runtime_error
@ -81,9 +74,7 @@ namespace
// TODO write minidump // TODO write minidump
static LONG WINAPI nvTopLevelFilter( struct _EXCEPTION_POINTERS * pExceptionInfo) static LONG WINAPI nvTopLevelFilter( struct _EXCEPTION_POINTERS *pExceptionInfo ) {
{
NV_UNUSED(pExceptionInfo);
/* BOOL (WINAPI * Dump) (HANDLE, DWORD, HANDLE, MINIDUMP_TYPE, PMINIDUMP_EXCEPTION_INFORMATION, PMINIDUMP_USER_STREAM_INFORMATION, PMINIDUMP_CALLBACK_INFORMATION ); /* BOOL (WINAPI * Dump) (HANDLE, DWORD, HANDLE, MINIDUMP_TYPE, PMINIDUMP_EXCEPTION_INFORMATION, PMINIDUMP_USER_STREAM_INFORMATION, PMINIDUMP_CALLBACK_INFORMATION );
AutoString dbghelp_path(512); AutoString dbghelp_path(512);
@ -135,10 +126,6 @@ namespace
#if defined(HAVE_EXECINFO_H) // NV_OS_LINUX #if defined(HAVE_EXECINFO_H) // NV_OS_LINUX
static bool nvHasStackTrace() {
return backtrace != NULL;
}
static void nvPrintStackTrace(void * trace[], int size, int start=0) { static void nvPrintStackTrace(void * trace[], int size, int start=0) {
char ** string_array = backtrace_symbols(trace, size); char ** string_array = backtrace_symbols(trace, size);
@ -177,36 +164,24 @@ namespace
static void * callerAddress(void * secret) static void * callerAddress(void * secret)
{ {
# if NV_OS_DARWIN # if NV_OS_DARWIN && NV_CPU_PPC
# if defined(_STRUCT_MCONTEXT) ucontext_t * ucp = (ucontext_t *)secret;
# if NV_CPU_PPC return (void *) ucp->uc_mcontext->ss.srr0;
ucontext_t * ucp = (ucontext_t *)secret; # elif NV_OS_DARWIN && NV_CPU_X86
return (void *) ucp->uc_mcontext->__ss.__srr0; ucontext_t * ucp = (ucontext_t *)secret;
# elif NV_CPU_X86 return (void *) ucp->uc_mcontext->ss.eip;
ucontext_t * ucp = (ucontext_t *)secret; # elif NV_CPU_X86_64
return (void *) ucp->uc_mcontext->__ss.__eip; // #define REG_RIP REG_INDEX(rip) // seems to be 16
# endif ucontext_t * ucp = (ucontext_t *)secret;
# else return (void *)ucp->uc_mcontext.gregs[REG_RIP];
# if NV_CPU_PPC # elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret; ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.srr0; return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/];
# elif NV_CPU_X86 # elif NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret; ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.eip; return (void *) ucp->uc_mcontext.regs->nip;
# endif
# endif
# else # else
# if NV_CPU_X86_64 return NULL;
// #define REG_RIP REG_INDEX(rip) // seems to be 16
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.gregs[REG_RIP];
# elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/];
# elif NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext.regs->nip;
# endif
# endif # endif
// How to obtain the instruction pointers in different platforms, from mlton's source code. // How to obtain the instruction pointers in different platforms, from mlton's source code.
@ -251,18 +226,17 @@ namespace
} }
# if defined(HAVE_EXECINFO_H) # if defined(HAVE_EXECINFO_H)
if (nvHasStackTrace()) // in case of weak linking
{
void * trace[64];
int size = backtrace(trace, 64);
if (pnt != NULL) { void * trace[64];
// Overwrite sigaction with caller's address. int size = backtrace(trace, 64);
trace[1] = pnt;
} if (pnt != NULL) {
// Overwrite sigaction with caller's address.
nvPrintStackTrace(trace, size, 1); trace[1] = pnt;
} }
nvPrintStackTrace(trace, size, 1);
# endif // defined(HAVE_EXECINFO_H) # endif // defined(HAVE_EXECINFO_H)
exit(0); exit(0);
@ -397,12 +371,9 @@ namespace
# endif # endif
# if defined(HAVE_EXECINFO_H) # if defined(HAVE_EXECINFO_H)
if (nvHasStackTrace()) void * trace[64];
{ int size = backtrace(trace, 64);
void * trace[64]; nvPrintStackTrace(trace, size, 3);
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 3);
}
# endif # endif
// Exit cleanly. // Exit cleanly.
@ -449,12 +420,9 @@ void NV_CDECL nvDebug(const char *msg, ...)
void debug::dumpInfo() void debug::dumpInfo()
{ {
#if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H) #if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H)
if (nvHasStackTrace()) void * trace[64];
{ int size = backtrace(trace, 64);
void * trace[64]; nvPrintStackTrace(trace, size, 1);
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 1);
}
#endif #endif
} }

View File

@ -2,7 +2,8 @@
#error "Do not include this file directly." #error "Do not include this file directly."
#endif #endif
#include <stdint.h> // uint8_t, int8_t, ... #include <stdlib.h> // uint8_t, int8_t, ...
// Function linkage // Function linkage
#define DLL_IMPORT #define DLL_IMPORT

View File

@ -19,9 +19,7 @@
// Set standard function names. // Set standard function names.
#define snprintf _snprintf #define snprintf _snprintf
#if _MSC_VER < 1500 #define vsnprintf _vsnprintf
# define vsnprintf _vsnprintf
#endif
#define vsscanf _vsscanf #define vsscanf _vsscanf
#define chdir _chdir #define chdir _chdir
#define getcwd _getcwd #define getcwd _getcwd

View File

@ -1,41 +0,0 @@
#include "Library.h"
#include "Debug.h"
#if NV_OS_WIN32
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#include <windows.h>
#else
#include <dlfcn.h>
#endif
void * nvLoadLibrary(const char * name)
{
#if NV_OS_WIN32
return (void *)LoadLibraryExA( name, NULL, 0 );
#else
return dlopen(name, RTLD_LAZY);
#endif
}
void nvUnloadLibrary(void * handle)
{
nvDebugCheck(handle != NULL);
#if NV_OS_WIN32
FreeLibrary((HMODULE)handle);
#else
dlclose(handle);
#endif
}
void * nvBindSymbol(void * handle, const char * symbol)
{
#if NV_OS_WIN32
return (void *)GetProcAddress((HMODULE)handle, symbol);
#else
return (void *)dlsym(handle, symbol);
#endif
}

View File

@ -1,50 +0,0 @@
// This code is in the public domain -- castano@gmail.com
#ifndef NV_CORE_LIBRARY_H
#define NV_CORE_LIBRARY_H
#include <nvcore/nvcore.h>
#if NV_OS_WIN32
#define LIBRARY_NAME(name) #name ".dll"
#elif NV_OS_DARWIN
#define NV_LIBRARY_NAME(name) "lib" #name ".dylib"
#else
#define NV_LIBRARY_NAME(name) "lib" #name ".so"
#endif
NVCORE_API void * nvLoadLibrary(const char * name);
NVCORE_API void nvUnloadLibrary(void * lib);
NVCORE_API void * nvBindSymbol(void * lib, const char * symbol);
class NVCORE_CLASS Library
{
public:
Library(const char * name)
{
handle = nvLoadLibrary(name);
}
~Library()
{
if (isValid())
{
nvUnloadLibrary(handle);
}
}
bool isValid() const
{
return handle != NULL;
}
void * bindSymbol(const char * symbol)
{
return nvBindSymbol(handle, symbol);
}
private:
void * handle;
};
#endif // NV_CORE_LIBRARY_H

View File

@ -18,8 +18,6 @@ void * nv::mem::malloc(size_t size)
void * nv::mem::malloc(size_t size, const char * file, int line) void * nv::mem::malloc(size_t size, const char * file, int line)
{ {
NV_UNUSED(file);
NV_UNUSED(line);
return ::malloc(size); return ::malloc(size);
} }

View File

@ -24,7 +24,7 @@ __forceinline void nvPrefetch(const void * mem)
#else // NV_CC_MSVC #else // NV_CC_MSVC
// do nothing in other case. // do nothing in other case.
#define nvPrefetch(ptr) #define piPrefetch(ptr)
#endif // NV_CC_MSVC #endif // NV_CC_MSVC

View File

@ -43,11 +43,8 @@ public:
/** Delete owned pointer and assign new one. */ /** Delete owned pointer and assign new one. */
void operator=( T * p ) { void operator=( T * p ) {
if (p != m_ptr) delete m_ptr;
{ m_ptr = p;
delete m_ptr;
m_ptr = p;
}
} }
/** Member access. */ /** Member access. */
@ -252,14 +249,14 @@ public:
/** -> operator. */ /** -> operator. */
BaseClass * operator -> () const BaseClass * operator -> () const
{ {
nvCheck( m_ptr != NULL ); piCheck( m_ptr != NULL );
return m_ptr; return m_ptr;
} }
/** * operator. */ /** * operator. */
BaseClass & operator*() const BaseClass & operator*() const
{ {
nvCheck( m_ptr != NULL ); piCheck( m_ptr != NULL );
return *m_ptr; return *m_ptr;
} }

View File

@ -14,7 +14,7 @@ namespace nv
uint strHash(const char * str, uint h) NV_PURE; uint strHash(const char * str, uint h) NV_PURE;
/// String hash based on Bernstein's hash. /// String hash vased on Bernstein's hash.
inline uint strHash(const char * data, uint h = 5381) inline uint strHash(const char * data, uint h = 5381)
{ {
uint i; uint i;
@ -213,12 +213,9 @@ namespace nv
/// Implement value semantics. /// Implement value semantics.
String & operator=( const String & str ) String & operator=( const String & str )
{ {
if (str.data != data) release();
{ data = str.data;
release(); addRef();
data = str.data;
addRef();
}
return *this; return *this;
} }

View File

@ -307,6 +307,15 @@ void ColorBlock::boundsRangeAlpha(Color32 * start, Color32 * end) const
} }
void ColorBlock::bestFitRange(Color32 * start, Color32 * end) const
{
nvDebugCheck(start != NULL);
nvDebugCheck(end != NULL);
Vector3 axis = bestFitLine().direction();
computeRange(axis, start, end);
}
/// Sort colors by abosolute value in their 16 bit representation. /// Sort colors by abosolute value in their 16 bit representation.
void ColorBlock::sortColorsByAbsoluteValue() void ColorBlock::sortColorsByAbsoluteValue()
{ {
@ -384,6 +393,19 @@ void ColorBlock::sortColors(const Vector3 & axis)
} }
/// Get least squares line that best approxiamtes the points of the color block.
Line3 ColorBlock::bestFitLine() const
{
Array<Vector3> pointArray(16);
for(int i = 0; i < 16; i++) {
pointArray.append(Vector3(m_color[i].r, m_color[i].g, m_color[i].b));
}
return Fit::bestLine(pointArray);
}
/// Get the volume of the color block. /// Get the volume of the color block.
float ColorBlock::volume() const float ColorBlock::volume() const
{ {

View File

@ -4,6 +4,7 @@
#define NV_IMAGE_COLORBLOCK_H #define NV_IMAGE_COLORBLOCK_H
#include <nvmath/Color.h> #include <nvmath/Color.h>
#include <nvmath/Fitting.h> // Line3
namespace nv namespace nv
{ {
@ -32,13 +33,16 @@ namespace nv
void luminanceRange(Color32 * start, Color32 * end) const; void luminanceRange(Color32 * start, Color32 * end) const;
void boundsRange(Color32 * start, Color32 * end) const; void boundsRange(Color32 * start, Color32 * end) const;
void boundsRangeAlpha(Color32 * start, Color32 * end) const; void boundsRangeAlpha(Color32 * start, Color32 * end) const;
void bestFitRange(Color32 * start, Color32 * end) const;
void sortColorsByAbsoluteValue(); void sortColorsByAbsoluteValue();
void computeRange(const Vector3 & axis, Color32 * start, Color32 * end) const; void computeRange(const Vector3 & axis, Color32 * start, Color32 * end) const;
void sortColors(const Vector3 & axis); void sortColors(const Vector3 & axis);
Line3 bestFitLine() const;
float volume() const; float volume() const;
Line3 diameterLine() const;
// Accessors // Accessors
const Color32 * colors() const; const Color32 * colors() const;

View File

@ -54,10 +54,6 @@ namespace
static const uint FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'); static const uint FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1');
static const uint FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'); static const uint FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2');
static const uint FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y');
static const uint FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0');
// 32 bit RGB formats. // 32 bit RGB formats.
static const uint D3DFMT_R8G8B8 = 20; static const uint D3DFMT_R8G8B8 = 20;
static const uint D3DFMT_A8R8G8B8 = 21; static const uint D3DFMT_A8R8G8B8 = 21;
@ -257,144 +253,6 @@ namespace
D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4, D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4,
}; };
const char * getDxgiFormatString(DXGI_FORMAT dxgiFormat)
{
#define CASE(format) case DXGI_FORMAT_##format: return #format
switch(dxgiFormat)
{
CASE(UNKNOWN);
CASE(R32G32B32A32_TYPELESS);
CASE(R32G32B32A32_FLOAT);
CASE(R32G32B32A32_UINT);
CASE(R32G32B32A32_SINT);
CASE(R32G32B32_TYPELESS);
CASE(R32G32B32_FLOAT);
CASE(R32G32B32_UINT);
CASE(R32G32B32_SINT);
CASE(R16G16B16A16_TYPELESS);
CASE(R16G16B16A16_FLOAT);
CASE(R16G16B16A16_UNORM);
CASE(R16G16B16A16_UINT);
CASE(R16G16B16A16_SNORM);
CASE(R16G16B16A16_SINT);
CASE(R32G32_TYPELESS);
CASE(R32G32_FLOAT);
CASE(R32G32_UINT);
CASE(R32G32_SINT);
CASE(R32G8X24_TYPELESS);
CASE(D32_FLOAT_S8X24_UINT);
CASE(R32_FLOAT_X8X24_TYPELESS);
CASE(X32_TYPELESS_G8X24_UINT);
CASE(R10G10B10A2_TYPELESS);
CASE(R10G10B10A2_UNORM);
CASE(R10G10B10A2_UINT);
CASE(R11G11B10_FLOAT);
CASE(R8G8B8A8_TYPELESS);
CASE(R8G8B8A8_UNORM);
CASE(R8G8B8A8_UNORM_SRGB);
CASE(R8G8B8A8_UINT);
CASE(R8G8B8A8_SNORM);
CASE(R8G8B8A8_SINT);
CASE(R16G16_TYPELESS);
CASE(R16G16_FLOAT);
CASE(R16G16_UNORM);
CASE(R16G16_UINT);
CASE(R16G16_SNORM);
CASE(R16G16_SINT);
CASE(R32_TYPELESS);
CASE(D32_FLOAT);
CASE(R32_FLOAT);
CASE(R32_UINT);
CASE(R32_SINT);
CASE(R24G8_TYPELESS);
CASE(D24_UNORM_S8_UINT);
CASE(R24_UNORM_X8_TYPELESS);
CASE(X24_TYPELESS_G8_UINT);
CASE(R8G8_TYPELESS);
CASE(R8G8_UNORM);
CASE(R8G8_UINT);
CASE(R8G8_SNORM);
CASE(R8G8_SINT);
CASE(R16_TYPELESS);
CASE(R16_FLOAT);
CASE(D16_UNORM);
CASE(R16_UNORM);
CASE(R16_UINT);
CASE(R16_SNORM);
CASE(R16_SINT);
CASE(R8_TYPELESS);
CASE(R8_UNORM);
CASE(R8_UINT);
CASE(R8_SNORM);
CASE(R8_SINT);
CASE(A8_UNORM);
CASE(R1_UNORM);
CASE(R9G9B9E5_SHAREDEXP);
CASE(R8G8_B8G8_UNORM);
CASE(G8R8_G8B8_UNORM);
CASE(BC1_TYPELESS);
CASE(BC1_UNORM);
CASE(BC1_UNORM_SRGB);
CASE(BC2_TYPELESS);
CASE(BC2_UNORM);
CASE(BC2_UNORM_SRGB);
CASE(BC3_TYPELESS);
CASE(BC3_UNORM);
CASE(BC3_UNORM_SRGB);
CASE(BC4_TYPELESS);
CASE(BC4_UNORM);
CASE(BC4_SNORM);
CASE(BC5_TYPELESS);
CASE(BC5_UNORM);
CASE(BC5_SNORM);
CASE(B5G6R5_UNORM);
CASE(B5G5R5A1_UNORM);
CASE(B8G8R8A8_UNORM);
CASE(B8G8R8X8_UNORM);
default:
return "UNKNOWN";
}
#undef CASE
}
const char * getD3d10ResourceDimensionString(D3D10_RESOURCE_DIMENSION resourceDimension)
{
switch(resourceDimension)
{
default:
case D3D10_RESOURCE_DIMENSION_UNKNOWN: return "UNKNOWN";
case D3D10_RESOURCE_DIMENSION_BUFFER: return "BUFFER";
case D3D10_RESOURCE_DIMENSION_TEXTURE1D: return "TEXTURE1D";
case D3D10_RESOURCE_DIMENSION_TEXTURE2D: return "TEXTURE2D";
case D3D10_RESOURCE_DIMENSION_TEXTURE3D: return "TEXTURE3D";
}
}
} // namespace } // namespace
namespace nv namespace nv
@ -532,7 +390,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes. // Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T'); this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (2 << 16) | (0 << 8) | (5); // major.minor.revision this->reserved[10] = (0 << 16) | (9 << 8) | (5); // major.minor.revision
this->pf.size = 32; this->pf.size = 32;
this->pf.flags = 0; this->pf.flags = 0;
@ -636,16 +494,7 @@ void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
// set fourcc pixel format. // set fourcc pixel format.
this->pf.flags = DDPF_FOURCC; this->pf.flags = DDPF_FOURCC;
this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3); this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3);
this->pf.bitcount = 0;
if (this->pf.fourcc == FOURCC_ATI2)
{
this->pf.bitcount = FOURCC_A2XY;
}
else
{
this->pf.bitcount = 0;
}
this->pf.rmask = 0; this->pf.rmask = 0;
this->pf.gmask = 0; this->pf.gmask = 0;
this->pf.bmask = 0; this->pf.bmask = 0;
@ -681,9 +530,9 @@ void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask
nvCheck(bitcount > 0 && bitcount <= 32); nvCheck(bitcount > 0 && bitcount <= 32);
// Align to 8. // Align to 8.
if (bitcount <= 8) bitcount = 8; if (bitcount < 8) bitcount = 8;
else if (bitcount <= 16) bitcount = 16; else if (bitcount < 16) bitcount = 16;
else if (bitcount <= 24) bitcount = 24; else if (bitcount < 24) bitcount = 24;
else bitcount = 32; else bitcount = 32;
this->pf.fourcc = 0; //findD3D9Format(bitcount, rmask, gmask, bmask, amask); this->pf.fourcc = 0; //findD3D9Format(bitcount, rmask, gmask, bmask, amask);
@ -696,8 +545,7 @@ void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask
void DDSHeader::setDX10Format(uint format) void DDSHeader::setDX10Format(uint format)
{ {
//this->pf.flags = 0; this->pf.flags = 0;
this->pf.fourcc = FOURCC_DX10;
this->header10.dxgiFormat = format; this->header10.dxgiFormat = format;
} }
@ -745,8 +593,7 @@ void DDSHeader::swapBytes()
bool DDSHeader::hasDX10Header() const bool DDSHeader::hasDX10Header() const
{ {
return this->pf.fourcc == FOURCC_DX10; // @@ This is according to AMD return this->pf.flags == 0;
//return this->pf.flags == 0; // @@ This is according to MS
} }
@ -776,7 +623,7 @@ bool DirectDrawSurface::isValid() const
return false; return false;
} }
const uint required = (DDSD_WIDTH|DDSD_HEIGHT/*|DDSD_CAPS|DDSD_PIXELFORMAT*/); const uint required = (DDSD_WIDTH|DDSD_HEIGHT|DDSD_CAPS|DDSD_PIXELFORMAT);
if( (header.flags & required) != required ) { if( (header.flags & required) != required ) {
return false; return false;
} }
@ -796,46 +643,40 @@ bool DirectDrawSurface::isSupported() const
{ {
nvDebugCheck(isValid()); nvDebugCheck(isValid());
if (header.hasDX10Header()) if (header.pf.flags & DDPF_FOURCC)
{ {
if (header.pf.fourcc != FOURCC_DXT1 &&
header.pf.fourcc != FOURCC_DXT2 &&
header.pf.fourcc != FOURCC_DXT3 &&
header.pf.fourcc != FOURCC_DXT4 &&
header.pf.fourcc != FOURCC_DXT5 &&
header.pf.fourcc != FOURCC_RXGB &&
header.pf.fourcc != FOURCC_ATI1 &&
header.pf.fourcc != FOURCC_ATI2)
{
// Unknown fourcc code.
return false;
}
}
else if (header.pf.flags & DDPF_RGB)
{
// All RGB formats are supported now.
} }
else else
{ {
if (header.pf.flags & DDPF_FOURCC) return false;
{ }
if (header.pf.fourcc != FOURCC_DXT1 &&
header.pf.fourcc != FOURCC_DXT2 && if (isTextureCube() && (header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES)
header.pf.fourcc != FOURCC_DXT3 && {
header.pf.fourcc != FOURCC_DXT4 && // Cubemaps must contain all faces.
header.pf.fourcc != FOURCC_DXT5 && return false;
header.pf.fourcc != FOURCC_RXGB && }
header.pf.fourcc != FOURCC_ATI1 &&
header.pf.fourcc != FOURCC_ATI2) if (isTexture3D())
{ {
// Unknown fourcc code. // @@ 3D textures not supported yet.
return false; return false;
}
}
else if (header.pf.flags & DDPF_RGB)
{
// All RGB formats are supported now.
}
else
{
return false;
}
if (isTextureCube() && (header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES)
{
// Cubemaps must contain all faces.
return false;
}
if (isTexture3D())
{
// @@ 3D textures not supported yet.
return false;
}
} }
return true; return true;
@ -871,40 +712,16 @@ uint DirectDrawSurface::depth() const
else return 1; else return 1;
} }
bool DirectDrawSurface::isTexture1D() const
{
nvDebugCheck(isValid());
if (header.hasDX10Header())
{
return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE1D;
}
return false;
}
bool DirectDrawSurface::isTexture2D() const bool DirectDrawSurface::isTexture2D() const
{ {
nvDebugCheck(isValid()); nvDebugCheck(isValid());
if (header.hasDX10Header()) return !isTexture3D() && !isTextureCube();
{
return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE2D;
}
else
{
return !isTexture3D() && !isTextureCube();
}
} }
bool DirectDrawSurface::isTexture3D() const bool DirectDrawSurface::isTexture3D() const
{ {
nvDebugCheck(isValid()); nvDebugCheck(isValid());
if (header.hasDX10Header()) return (header.caps.caps2 & DDSCAPS2_VOLUME) != 0;
{
return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE3D;
}
else
{
return (header.caps.caps2 & DDSCAPS2_VOLUME) != 0;
}
} }
bool DirectDrawSurface::isTextureCube() const bool DirectDrawSurface::isTextureCube() const
@ -913,12 +730,6 @@ bool DirectDrawSurface::isTextureCube() const
return (header.caps.caps2 & DDSCAPS2_CUBEMAP) != 0; return (header.caps.caps2 & DDSCAPS2_CUBEMAP) != 0;
} }
void DirectDrawSurface::setNormalFlag(bool b)
{
nvDebugCheck(isValid());
header.setNormalFlag(b);
}
void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap) void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap)
{ {
nvDebugCheck(isValid()); nvDebugCheck(isValid());
@ -969,13 +780,7 @@ void DirectDrawSurface::readLinearImage(Image * img)
uint byteCount = (header.pf.bitcount + 7) / 8; uint byteCount = (header.pf.bitcount + 7) / 8;
// set image format: RGB or ARGB if (header.pf.amask != 0)
// alpha channel exists if and only if the alpha mask is non-zero
if (header.pf.amask == 0)
{
img->setFormat(Image::Format_RGB);
}
else
{ {
img->setFormat(Image::Format_ARGB); img->setFormat(Image::Format_ARGB);
} }
@ -1003,20 +808,7 @@ void DirectDrawSurface::readBlockImage(Image * img)
{ {
nvDebugCheck(stream != NULL); nvDebugCheck(stream != NULL);
nvDebugCheck(img != NULL); nvDebugCheck(img != NULL);
// set image format: RGB or ARGB
if (header.pf.fourcc == FOURCC_RXGB ||
header.pf.fourcc == FOURCC_ATI1 ||
header.pf.fourcc == FOURCC_ATI2 ||
header.pf.flags & DDPF_NORMAL)
{
img->setFormat(Image::Format_RGB);
}
else
{
img->setFormat(Image::Format_ARGB);
}
const uint w = img->width(); const uint w = img->width();
const uint h = img->height(); const uint h = img->height();
@ -1252,23 +1044,8 @@ void DirectDrawSurface::printInfo() const
if (header.pf.flags & DDPF_ALPHAPREMULT) printf("\t\tDDPF_ALPHAPREMULT\n"); if (header.pf.flags & DDPF_ALPHAPREMULT) printf("\t\tDDPF_ALPHAPREMULT\n");
if (header.pf.flags & DDPF_NORMAL) printf("\t\tDDPF_NORMAL\n"); if (header.pf.flags & DDPF_NORMAL) printf("\t\tDDPF_NORMAL\n");
printf("\tFourCC: '%c%c%c%c'\n", printf("\tFourCC: '%c%c%c%c'\n", ((header.pf.fourcc >> 0) & 0xFF), ((header.pf.fourcc >> 8) & 0xFF), ((header.pf.fourcc >> 16) & 0xFF), ((header.pf.fourcc >> 24) & 0xFF));
((header.pf.fourcc >> 0) & 0xFF), printf("\tBit count: %d\n", header.pf.bitcount);
((header.pf.fourcc >> 8) & 0xFF),
((header.pf.fourcc >> 16) & 0xFF),
((header.pf.fourcc >> 24) & 0xFF));
if ((header.pf.fourcc & DDPF_FOURCC) && (header.pf.bitcount != 0))
{
printf("\tSwizzle: '%c%c%c%c'\n",
(header.pf.bitcount >> 0) & 0xFF,
(header.pf.bitcount >> 8) & 0xFF,
(header.pf.bitcount >> 16) & 0xFF,
(header.pf.bitcount >> 24) & 0xFF);
}
else
{
printf("\tBit count: %d\n", header.pf.bitcount);
}
printf("\tRed mask: 0x%.8X\n", header.pf.rmask); printf("\tRed mask: 0x%.8X\n", header.pf.rmask);
printf("\tGreen mask: 0x%.8X\n", header.pf.gmask); printf("\tGreen mask: 0x%.8X\n", header.pf.gmask);
printf("\tBlue mask: 0x%.8X\n", header.pf.bmask); printf("\tBlue mask: 0x%.8X\n", header.pf.bmask);
@ -1299,11 +1076,11 @@ void DirectDrawSurface::printInfo() const
printf("\tCaps 3: 0x%.8X\n", header.caps.caps3); printf("\tCaps 3: 0x%.8X\n", header.caps.caps3);
printf("\tCaps 4: 0x%.8X\n", header.caps.caps4); printf("\tCaps 4: 0x%.8X\n", header.caps.caps4);
if (header.hasDX10Header()) if (header.pf.flags == 0)
{ {
printf("DX10 Header:\n"); printf("DX10 Header:\n");
printf("\tDXGI Format: %u (%s)\n", header.header10.dxgiFormat, getDxgiFormatString((DXGI_FORMAT)header.header10.dxgiFormat)); printf("\tDXGI Format: %u\n", header.header10.dxgiFormat);
printf("\tResource dimension: %u (%s)\n", header.header10.resourceDimension, getD3d10ResourceDimensionString((D3D10_RESOURCE_DIMENSION)header.header10.resourceDimension)); printf("\tResource dimension: %u\n", header.header10.resourceDimension);
printf("\tMisc flag: %u\n", header.header10.miscFlag); printf("\tMisc flag: %u\n", header.header10.miscFlag);
printf("\tArray size: %u\n", header.header10.arraySize); printf("\tArray size: %u\n", header.header10.arraySize);
} }

View File

@ -119,12 +119,9 @@ namespace nv
uint width() const; uint width() const;
uint height() const; uint height() const;
uint depth() const; uint depth() const;
bool isTexture1D() const;
bool isTexture2D() const; bool isTexture2D() const;
bool isTexture3D() const; bool isTexture3D() const;
bool isTextureCube() const; bool isTextureCube() const;
void setNormalFlag(bool b);
void mipmap(Image * img, uint f, uint m); void mipmap(Image * img, uint f, uint m);
// void mipmap(FloatImage * img, uint f, uint m); // void mipmap(FloatImage * img, uint f, uint m);

View File

@ -26,10 +26,10 @@
* http://www.xmission.com/~legalize/zoom.html * http://www.xmission.com/~legalize/zoom.html
* *
* Reconstruction Filters in Computer Graphics * Reconstruction Filters in Computer Graphics
* http://www.mentallandscape.com/Papers_siggraph88.pdf * http://www.mentallandscape.com/Papers_siggraph88.pdf
* *
* More references: * More references:
* http://www.worldserver.com/turk/computergraphics/ResamplingFilters.pdf * http://www.worldserver.com/turk/computergraphics/ResamplingFilters.pdf
* http://www.dspguide.com/ch16.htm * http://www.dspguide.com/ch16.htm
*/ */
@ -244,7 +244,7 @@ SincFilter::SincFilter(float w) : Filter(w) {}
float SincFilter::evaluate(float x) const float SincFilter::evaluate(float x) const
{ {
return sincf(PI * x); return 0.0f;
} }
@ -541,17 +541,12 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples/*= 32*/) PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples/*= 32*/)
{ {
nvCheck(srcLength >= dstLength); // @@ Upsampling not implemented!
nvDebugCheck(samples > 0); nvDebugCheck(samples > 0);
float scale = float(dstLength) / float(srcLength); const float scale = float(dstLength) / float(srcLength);
const float iscale = 1.0f / scale; const float iscale = 1.0f / scale;
if (scale > 1) {
// Upsampling.
samples = 1;
scale = 1;
}
m_length = dstLength; m_length = dstLength;
m_width = f.width() * iscale; m_width = f.width() * iscale;
m_windowSize = (int)ceilf(m_width * 2) + 1; m_windowSize = (int)ceilf(m_width * 2) + 1;
@ -582,7 +577,6 @@ PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLengt
m_data[i * m_windowSize + j] /= total; m_data[i * m_windowSize + j] /= total;
} }
} }
} }
PolyphaseKernel::~PolyphaseKernel() PolyphaseKernel::~PolyphaseKernel()

View File

@ -376,7 +376,7 @@ FloatImage * FloatImage::fastDownSample() const
{ {
const uint n = w * h; const uint n = w * h;
if ((m_width * m_height) & 1) if (n & 1)
{ {
const float scale = 1.0f / (2 * n + 1); const float scale = 1.0f / (2 * n + 1);
@ -540,18 +540,73 @@ FloatImage * FloatImage::fastDownSample() const
return dst_image.release(); return dst_image.release();
} }
/*
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Kernel1 & kernel, WrapMode wm) const
{
const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2);
return downSample(kernel, w, h, wm);
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Kernel1 & kernel, uint w, uint h, WrapMode wm) const
{
nvCheck(!(kernel.windowSize() & 1)); // Make sure that kernel m_width is even.
AutoPtr<FloatImage> tmp_image( new FloatImage() );
tmp_image->allocate(m_componentNum, w, m_height);
AutoPtr<FloatImage> dst_image( new FloatImage() );
dst_image->allocate(m_componentNum, w, h);
const float xscale = float(m_width) / float(w);
const float yscale = float(m_height) / float(h);
for(uint c = 0; c < m_componentNum; c++) {
float * tmp_channel = tmp_image->channel(c);
for(uint y = 0; y < m_height; y++) {
for(uint x = 0; x < w; x++) {
float sum = this->applyKernelHorizontal(&kernel, uint(x*xscale), y, c, wm);
const uint tmp_index = tmp_image->index(x, y);
tmp_channel[tmp_index] = sum;
}
}
float * dst_channel = dst_image->channel(c);
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
float sum = tmp_image->applyKernelVertical(&kernel, uint(x*xscale), uint(y*yscale), c, wm);
const uint dst_index = dst_image->index(x, y);
dst_channel[dst_index] = sum;
}
}
}
return dst_image.release();
}
*/
/// Downsample applying a 1D kernel separately in each dimension. /// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm) const FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm) const
{ {
const uint w = max(1, m_width / 2); const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2); const uint h = max(1, m_height / 2);
return resize(filter, w, h, wm); return downSample(filter, w, h, wm);
} }
/// Downsample applying a 1D kernel separately in each dimension. /// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm) const FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapMode wm) const
{ {
// @@ Use monophase filters when frac(m_width / w) == 0 // @@ Use monophase filters when frac(m_width / w) == 0

View File

@ -63,7 +63,7 @@ public:
NVIMAGE_API FloatImage * fastDownSample() const; NVIMAGE_API FloatImage * fastDownSample() const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const; NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const; NVIMAGE_API FloatImage * downSample(const Filter & filter, uint w, uint h, WrapMode wm) const;
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const; //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const;
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const; //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const;
@ -226,18 +226,14 @@ inline uint FloatImage::indexRepeat(int x, int y) const
inline uint FloatImage::indexMirror(int x, int y) const inline uint FloatImage::indexMirror(int x, int y) const
{ {
if (m_width == 1) x = 0;
x = abs(x); x = abs(x);
while (x >= m_width) { while (x >= m_width) {
x = abs(m_width + m_width - x - 2); x = m_width + m_width - x - 2;
} }
if (m_height == 1) y = 0;
y = abs(y); y = abs(y);
while (y >= m_height) { while (y >= m_height) {
y = abs(m_height + m_height - y - 2); y = m_height + m_height - y - 2;
} }
return index(x, y); return index(x, y);

View File

@ -296,7 +296,7 @@ static bool downsample(const FloatImage * src, const BitMap * srcMask, const Flo
return true; return true;
} }
// This is the filter used in the Lumigraph paper. // This is the filter used in the Lumigraph paper. The Unreal engine uses something similar.
void nv::fillPullPush(FloatImage * img, const BitMap * bmap) void nv::fillPullPush(FloatImage * img, const BitMap * bmap)
{ {
nvCheck(img != NULL); nvCheck(img != NULL);
@ -644,8 +644,8 @@ struct LocalPixels
// This is a quadratic extrapolation filter from Charles Bloom (DoPixelSeamFix). Used with his permission. // This is a cubic extrapolation filter from Charles Bloom (DoPixelSeamFix).
void nv::fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/) void nv::fillCubicExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/)
{ {
nvCheck(passCount > 0); nvCheck(passCount > 0);
nvCheck(img != NULL); nvCheck(img != NULL);

View File

@ -89,7 +89,7 @@ namespace nv
NVIMAGE_API void fillPullPush(FloatImage * img, const BitMap * bmap); NVIMAGE_API void fillPullPush(FloatImage * img, const BitMap * bmap);
NVIMAGE_API void fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap); NVIMAGE_API void fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap);
NVIMAGE_API void fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1); NVIMAGE_API void fillCubicExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1);
} // nv namespace } // nv namespace

View File

@ -15,7 +15,7 @@ Image::Image() : m_width(0), m_height(0), m_format(Format_RGB), m_data(NULL)
{ {
} }
Image::Image(const Image & img) : m_data(NULL) Image::Image(const Image & img)
{ {
allocate(img.m_width, img.m_height); allocate(img.m_width, img.m_height);
m_format = img.m_format; m_format = img.m_format;

View File

@ -16,7 +16,6 @@ http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT
#include <nvimage/Image.h> #include <nvimage/Image.h>
#include <nvimage/Quantize.h> #include <nvimage/Quantize.h>
#include <nvimage/PixelFormat.h>
using namespace nv; using namespace nv;
@ -48,20 +47,94 @@ void nv::Quantize::BinaryAlpha( Image * image, int alpha_threshold /*= 127*/ )
// Simple quantization. // Simple quantization.
void nv::Quantize::RGB16( Image * image ) void nv::Quantize::RGB16( Image * image )
{ {
Truncate(image, 5, 6, 5, 8); nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel32 = image->pixel(x, y);
// Convert to 16 bit and back to 32 using regular bit expansion.
Color32 pixel16 = toColor32( toColor16(pixel32) );
// Store color.
image->pixel(x, y) = pixel16;
}
}
} }
// Alpha quantization. // Alpha quantization.
void nv::Quantize::Alpha4( Image * image ) void nv::Quantize::Alpha4( Image * image )
{ {
Truncate(image, 8, 8, 8, 4); nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Convert to 4 bit using regular bit expansion.
pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
// Store color.
image->pixel(x, y) = pixel;
}
}
} }
// Error diffusion. Floyd Steinberg. // Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg_RGB16( Image * image ) void nv::Quantize::FloydSteinberg_RGB16( Image * image )
{ {
FloydSteinberg(image, 5, 6, 5, 8); nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
// @@ Use fixed point?
Vector3 * row0 = new Vector3[w+2];
Vector3 * row1 = new Vector3[w+2];
memset(row0, 0, sizeof(Vector3)*(w+2));
memset(row1, 0, sizeof(Vector3)*(w+2));
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel32 = image->pixel(x, y);
// Add error. // @@ We shouldn't clamp here!
pixel32.r = clamp(int(pixel32.r) + int(row0[1+x].x()), 0, 255);
pixel32.g = clamp(int(pixel32.g) + int(row0[1+x].y()), 0, 255);
pixel32.b = clamp(int(pixel32.b) + int(row0[1+x].z()), 0, 255);
// Convert to 16 bit. @@ Use regular clamp?
Color32 pixel16 = toColor32( toColor16(pixel32) );
// Store color.
image->pixel(x, y) = pixel16;
// Compute new error.
Vector3 diff(float(pixel32.r - pixel16.r), float(pixel32.g - pixel16.g), float(pixel32.b - pixel16.b));
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
row1[1+x-1] += 3.0f / 16.0f * diff;
row1[1+x+0] += 5.0f / 16.0f * diff;
row1[1+x+1] += 1.0f / 16.0f * diff;
}
swap(row0, row1);
memset(row1, 0, sizeof(Vector3)*(w+2));
}
delete [] row0;
delete [] row1;
} }
@ -115,102 +188,47 @@ void nv::Quantize::FloydSteinberg_BinaryAlpha( Image * image, int alpha_threshol
// Error diffusion. Floyd Steinberg. // Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg_Alpha4( Image * image ) void nv::Quantize::FloydSteinberg_Alpha4( Image * image )
{
FloydSteinberg(image, 8, 8, 8, 4);
}
void nv::Quantize::Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize)
{ {
nvCheck(image != NULL); nvCheck(image != NULL);
const uint w = image->width(); const uint w = image->width();
const uint h = image->height(); const uint h = image->height();
// @@ Use fixed point?
float * row0 = new float[(w+2)];
float * row1 = new float[(w+2)];
memset(row0, 0, sizeof(float)*(w+2));
memset(row1, 0, sizeof(float)*(w+2));
for(uint y = 0; y < h; y++) { for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) { for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y); Color32 pixel = image->pixel(x, y);
// Convert to our desired size, and reconstruct. // Add error.
pixel.r = PixelFormat::convert(pixel.r, 8, rsize); int alpha = int(pixel.a) + int(row0[1+x]);
pixel.r = PixelFormat::convert(pixel.r, rsize, 8);
// Convert to 4 bit using regular bit expansion.
pixel.g = PixelFormat::convert(pixel.g, 8, gsize); pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
pixel.g = PixelFormat::convert(pixel.g, gsize, 8);
pixel.b = PixelFormat::convert(pixel.b, 8, bsize);
pixel.b = PixelFormat::convert(pixel.b, bsize, 8);
pixel.a = PixelFormat::convert(pixel.a, 8, asize);
pixel.a = PixelFormat::convert(pixel.a, asize, 8);
// Store color. // Store color.
image->pixel(x, y) = pixel; image->pixel(x, y) = pixel;
// Compute new error.
float diff = float(alpha - pixel.a);
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
row1[1+x-1] += 3.0f / 16.0f * diff;
row1[1+x+0] += 5.0f / 16.0f * diff;
row1[1+x+1] += 1.0f / 16.0f * diff;
} }
swap(row0, row1);
memset(row1, 0, sizeof(float)*(w+2));
} }
delete [] row0;
delete [] row1;
} }
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
Vector4 * row0 = new Vector4[w+2];
Vector4 * row1 = new Vector4[w+2];
memset(row0, 0, sizeof(Vector4)*(w+2));
memset(row1, 0, sizeof(Vector4)*(w+2));
for (uint y = 0; y < h; y++) {
for (uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Add error.
pixel.r = clamp(int(pixel.r) + int(row0[1+x].x()), 0, 255);
pixel.g = clamp(int(pixel.g) + int(row0[1+x].y()), 0, 255);
pixel.b = clamp(int(pixel.b) + int(row0[1+x].z()), 0, 255);
pixel.a = clamp(int(pixel.a) + int(row0[1+x].w()), 0, 255);
int r = pixel.r;
int g = pixel.g;
int b = pixel.b;
int a = pixel.a;
// Convert to our desired size, and reconstruct.
r = PixelFormat::convert(r, 8, rsize);
r = PixelFormat::convert(r, rsize, 8);
g = PixelFormat::convert(g, 8, gsize);
g = PixelFormat::convert(g, gsize, 8);
b = PixelFormat::convert(b, 8, bsize);
b = PixelFormat::convert(b, bsize, 8);
a = PixelFormat::convert(a, 8, asize);
a = PixelFormat::convert(a, asize, 8);
// Store color.
image->pixel(x, y) = Color32(r, g, b, a);
// Compute new error.
Vector4 diff(float(int(pixel.r) - r), float(int(pixel.g) - g), float(int(pixel.b) - b), float(int(pixel.a) - a));
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
row1[1+x-1] += 3.0f / 16.0f * diff;
row1[1+x+0] += 5.0f / 16.0f * diff;
row1[1+x+1] += 1.0f / 16.0f * diff;
}
swap(row0, row1);
memset(row1, 0, sizeof(Vector4)*(w+2));
}
delete [] row0;
delete [] row1;
}

View File

@ -17,9 +17,6 @@ namespace nv
void FloydSteinberg_BinaryAlpha(Image * img, int alpha_threshold = 127); void FloydSteinberg_BinaryAlpha(Image * img, int alpha_threshold = 127);
void FloydSteinberg_Alpha4(Image * img); void FloydSteinberg_Alpha4(Image * img);
void Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize);
void FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize);
// @@ Add palette quantization algorithms! // @@ Add palette quantization algorithms!
} }
} }

View File

@ -108,7 +108,7 @@ public:
float area() const float area() const
{ {
const Vector3 d = extents(); const Vector3 d = extents();
return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z()); return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
} }
/// Get the volume of the box. /// Get the volume of the box.
@ -118,14 +118,6 @@ public:
return 8.0f * (d.x() * d.y() * d.z()); return 8.0f * (d.x() * d.y() * d.z());
} }
/// Return true if the box contains the given point.
bool contains(Vector3::Arg p) const
{
return
m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
}
private: private:
Vector3 m_mins; Vector3 m_mins;
@ -133,6 +125,15 @@ private:
}; };
/*
/// Point inside box test.
inline bool pointInsideBox(const Box & b, Vector3::Arg p) const
{
return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z());
}
*/
} // nv namespace } // nv namespace

View File

@ -7,6 +7,8 @@ SET(MATH_SRCS
Quaternion.h Quaternion.h
Box.h Box.h
Color.h Color.h
Eigen.h Eigen.cpp
Fitting.h Fitting.cpp
Montecarlo.h Montecarlo.cpp Montecarlo.h Montecarlo.cpp
Random.h Random.cpp Random.h Random.cpp
SphericalHarmonic.h SphericalHarmonic.cpp SphericalHarmonic.h SphericalHarmonic.cpp

533
src/nvmath/Eigen.cpp Normal file
View File

@ -0,0 +1,533 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "Eigen.h"
using namespace nv;
static const float EPS = 0.00001f;
static const int MAX_ITER = 100;
static void semi_definite_symmetric_eigen(const float *mat, int n, float *eigen_vec, float *eigen_val);
// Use power method to find the first eigenvector.
// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html
Vector3 nv::firstEigenVector(float matrix[6])
{
// Number of iterations. @@ Use a variable number of iterations.
const int NUM = 8;
Vector3 v(1, 1, 1);
for(int i = 0; i < NUM; i++) {
float x = v.x() * matrix[0] + v.y() * matrix[1] + v.z() * matrix[2];
float y = v.x() * matrix[1] + v.y() * matrix[3] + v.z() * matrix[4];
float z = v.x() * matrix[2] + v.y() * matrix[4] + v.z() * matrix[5];
float norm = max(max(x, y), z);
float iv = 1.0f / norm;
if (norm == 0.0f) {
return Vector3(zero);
}
v.set(x*iv, y*iv, z*iv);
}
return v;
}
/// Solve eigen system.
void Eigen::solve() {
semi_definite_symmetric_eigen(matrix, N, eigen_vec, eigen_val);
}
/// Solve eigen system.
void Eigen3::solve() {
// @@ Use lengyel code that seems to be more optimized.
#if 1
float v[3*3];
semi_definite_symmetric_eigen(matrix, 3, v, eigen_val);
eigen_vec[0].set(v[0], v[1], v[2]);
eigen_vec[1].set(v[3], v[4], v[5]);
eigen_vec[2].set(v[6], v[7], v[8]);
#else
const int maxSweeps = 32;
const float epsilon = 1.0e-10f;
float m11 = matrix[0]; // m(0,0);
float m12 = matrix[1]; // m(0,1);
float m13 = matrix[2]; // m(0,2);
float m22 = matrix[3]; // m(1,1);
float m23 = matrix[4]; // m(1,2);
float m33 = matrix[5]; // m(2,2);
//r.SetIdentity();
eigen_vec[0].set(1, 0, 0);
eigen_vec[1].set(0, 1, 0);
eigen_vec[2].set(0, 0, 1);
for (int a = 0; a < maxSweeps; a++)
{
// Exit if off-diagonal entries small enough
if ((fabs(m12) < epsilon) && (fabs(m13) < epsilon) && (fabs(m23) < epsilon))
{
break;
}
// Annihilate (1,2) entry
if (m12 != 0.0f)
{
float u = (m22 - m11) * 0.5f / m12;
float u2 = u * u;
float u2p1 = u2 + 1.0f;
float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u;
float c = 1.0f / sqrt(t * t + 1.0f);
float s = c * t;
m11 -= t * m12;
m22 += t * m12;
m12 = 0.0f;
float temp = c * m13 - s * m23;
m23 = s * m13 + c * m23;
m13 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * eigen_vec[i].x - s * eigen_vec[i].y;
eigen_vec[i].y = s * eigen_vec[i].x + c * eigen_vec[i].y;
eigen_vec[i].x = temp;
}
}
// Annihilate (1,3) entry
if (m13 != 0.0f)
{
float u = (m33 - m11) * 0.5f / m13;
float u2 = u * u;
float u2p1 = u2 + 1.0f;
float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u;
float c = 1.0f / sqrt(t * t + 1.0f);
float s = c * t;
m11 -= t * m13;
m33 += t * m13;
m13 = 0.0f;
float temp = c * m12 - s * m23;
m23 = s * m12 + c * m23;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * eigen_vec[i].x - s * eigen_vec[i].z;
eigen_vec[i].z = s * eigen_vec[i].x + c * eigen_vec[i].z;
eigen_vec[i].x = temp;
}
}
// Annihilate (2,3) entry
if (m23 != 0.0f)
{
float u = (m33 - m22) * 0.5f / m23;
float u2 = u * u;
float u2p1 = u2 + 1.0f;
float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u;
float c = 1.0f / sqrt(t * t + 1.0f);
float s = c * t;
m22 -= t * m23;
m33 += t * m23;
m23 = 0.0f;
float temp = c * m12 - s * m13;
m13 = s * m12 + c * m13;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * eigen_vec[i].y - s * eigen_vec[i].z;
eigen_vec[i].z = s * eigen_vec[i].y + c * eigen_vec[i].z;
eigen_vec[i].y = temp;
}
}
}
eigen_val[0] = m11;
eigen_val[1] = m22;
eigen_val[2] = m33;
#endif
}
/*---------------------------------------------------------------------------
Functions
---------------------------------------------------------------------------*/
/** @@ I don't remember where did I get this function.
* computes the eigen values and eigen vectors
* of a semi definite symmetric matrix
*
* - matrix is stored in column symmetric storage, i.e.
* matrix = { m11, m12, m22, m13, m23, m33, m14, m24, m34, m44 ... }
* size = n(n+1)/2
*
* - eigen_vectors (return) = { v1, v2, v3, ..., vn } where vk = vk0, vk1, ..., vkn
* size = n^2, must be allocated by caller
*
* - eigen_values (return) are in decreasing order
* size = n, must be allocated by caller
*/
void semi_definite_symmetric_eigen(
const float *mat, int n, float *eigen_vec, float *eigen_val
) {
float *a,*v;
float a_norm,a_normEPS,thr,thr_nn;
int nb_iter = 0;
int jj;
int i,j,k,ij,ik,l,m,lm,mq,lq,ll,mm,imv,im,iq,ilv,il,nn;
int *index;
float a_ij,a_lm,a_ll,a_mm,a_im,a_il;
float a_lm_2;
float v_ilv,v_imv;
float x;
float sinx,sinx_2,cosx,cosx_2,sincos;
float delta;
// Number of entries in mat
nn = (n*(n+1))/2;
// Step 1: Copy mat to a
a = new float[nn];
for( ij=0; ij<nn; ij++ ) {
a[ij] = mat[ij];
}
// Ugly Fortran-porting trick: indices for a are between 1 and n
a--;
// Step 2 : Init diagonalization matrix as the unit matrix
v = new float[n*n];
ij = 0;
for( i=0; i<n; i++ ) {
for( j=0; j<n; j++ ) {
if( i==j ) {
v[ij++] = 1.0;
} else {
v[ij++] = 0.0;
}
}
}
// Ugly Fortran-porting trick: indices for v are between 1 and n
v--;
// Step 3 : compute the weight of the non diagonal terms
ij = 1 ;
a_norm = 0.0;
for( i=1; i<=n; i++ ) {
for( j=1; j<=i; j++ ) {
if( i!=j ) {
a_ij = a[ij];
a_norm += a_ij*a_ij;
}
ij++;
}
}
if( a_norm != 0.0 ) {
a_normEPS = a_norm*EPS;
thr = a_norm ;
// Step 4 : rotations
while( thr > a_normEPS && nb_iter < MAX_ITER ) {
nb_iter++;
thr_nn = thr / nn;
for( l=1 ; l< n; l++ ) {
for( m=l+1; m<=n; m++ ) {
// compute sinx and cosx
lq = (l*l-l)/2;
mq = (m*m-m)/2;
lm = l+mq;
a_lm = a[lm];
a_lm_2 = a_lm*a_lm;
if( a_lm_2 < thr_nn ) {
continue ;
}
ll = l+lq;
mm = m+mq;
a_ll = a[ll];
a_mm = a[mm];
delta = a_ll - a_mm;
if( delta == 0.0f ) {
x = - PI/4 ;
} else {
x = - atanf( (a_lm+a_lm) / delta ) / 2.0f ;
}
sinx = sinf(x);
cosx = cosf(x);
sinx_2 = sinx*sinx;
cosx_2 = cosx*cosx;
sincos = sinx*cosx;
// rotate L and M columns
ilv = n*(l-1);
imv = n*(m-1);
for( i=1; i<=n;i++ ) {
if( (i!=l) && (i!=m) ) {
iq = (i*i-i)/2;
if( i<m ) {
im = i + mq;
} else {
im = m + iq;
}
a_im = a[im];
if( i<l ) {
il = i + lq;
} else {
il = l + iq;
}
a_il = a[il];
a[il] = a_il*cosx - a_im*sinx;
a[im] = a_il*sinx + a_im*cosx;
}
ilv++;
imv++;
v_ilv = v[ilv];
v_imv = v[imv];
v[ilv] = cosx*v_ilv - sinx*v_imv;
v[imv] = sinx*v_ilv + cosx*v_imv;
}
x = a_lm*sincos; x+=x;
a[ll] = a_ll*cosx_2 + a_mm*sinx_2 - x;
a[mm] = a_ll*sinx_2 + a_mm*cosx_2 + x;
a[lm] = 0.0;
thr = fabs( thr - a_lm_2 );
}
}
}
}
// Step 5: index conversion and copy eigen values
// back from Fortran to C++
a++;
for( i=0; i<n; i++ ) {
k = i + (i*(i+1))/2;
eigen_val[i] = a[k];
}
delete[] a;
// Step 6: sort the eigen values and eigen vectors
index = new int[n];
for( i=0; i<n; i++ ) {
index[i] = i;
}
for( i=0; i<(n-1); i++ ) {
x = eigen_val[i];
k = i;
for( j=i+1; j<n; j++ ) {
if( x < eigen_val[j] ) {
k = j;
x = eigen_val[j];
}
}
eigen_val[k] = eigen_val[i];
eigen_val[i] = x;
jj = index[k];
index[k] = index[i];
index[i] = jj;
}
// Step 7: save the eigen vectors
v++; // back from Fortran to to C++
ij = 0;
for( k=0; k<n; k++ ) {
ik = index[k]*n;
for( i=0; i<n; i++ ) {
eigen_vec[ij++] = v[ik++];
}
}
delete[] v ;
delete[] index;
return;
}
//_________________________________________________________
// Eric Lengyel code:
// http://www.terathon.com/code/linear.html
#if 0
const float epsilon = 1.0e-10F;
const int maxSweeps = 32;
struct Matrix3D
{
float n[3][3];
float& operator()(int i, int j)
{
return (n[j][i]);
}
const float& operator()(int i, int j) const
{
return (n[j][i]);
}
void SetIdentity(void)
{
n[0][0] = n[1][1] = n[2][2] = 1.0F;
n[0][1] = n[0][2] = n[1][0] = n[1][2] = n[2][0] = n[2][1] = 0.0F;
}
};
void CalculateEigensystem(const Matrix3D& m, float *lambda, Matrix3D& r)
{
float m11 = m(0,0);
float m12 = m(0,1);
float m13 = m(0,2);
float m22 = m(1,1);
float m23 = m(1,2);
float m33 = m(2,2);
r.SetIdentity();
for (int a = 0; a < maxSweeps; a++)
{
// Exit if off-diagonal entries small enough
if ((Fabs(m12) < epsilon) && (Fabs(m13) < epsilon) &&
(Fabs(m23) < epsilon)) break;
// Annihilate (1,2) entry
if (m12 != 0.0F)
{
float u = (m22 - m11) * 0.5F / m12;
float u2 = u * u;
float u2p1 = u2 + 1.0F;
float t = (u2p1 != u2) ?
((u < 0.0F) ? -1.0F : 1.0F) * (sqrt(u2p1) - fabs(u)) : 0.5F / u;
float c = 1.0F / sqrt(t * t + 1.0F);
float s = c * t;
m11 -= t * m12;
m22 += t * m12;
m12 = 0.0F;
float temp = c * m13 - s * m23;
m23 = s * m13 + c * m23;
m13 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * r(i,0) - s * r(i,1);
r(i,1) = s * r(i,0) + c * r(i,1);
r(i,0) = temp;
}
}
// Annihilate (1,3) entry
if (m13 != 0.0F)
{
float u = (m33 - m11) * 0.5F / m13;
float u2 = u * u;
float u2p1 = u2 + 1.0F;
float t = (u2p1 != u2) ?
((u < 0.0F) ? -1.0F : 1.0F) * (sqrt(u2p1) - fabs(u)) : 0.5F / u;
float c = 1.0F / sqrt(t * t + 1.0F);
float s = c * t;
m11 -= t * m13;
m33 += t * m13;
m13 = 0.0F;
float temp = c * m12 - s * m23;
m23 = s * m12 + c * m23;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * r(i,0) - s * r(i,2);
r(i,2) = s * r(i,0) + c * r(i,2);
r(i,0) = temp;
}
}
// Annihilate (2,3) entry
if (m23 != 0.0F)
{
float u = (m33 - m22) * 0.5F / m23;
float u2 = u * u;
float u2p1 = u2 + 1.0F;
float t = (u2p1 != u2) ?
((u < 0.0F) ? -1.0F : 1.0F) * (sqrt(u2p1) - fabs(u)) : 0.5F / u;
float c = 1.0F / sqrt(t * t + 1.0F);
float s = c * t;
m22 -= t * m23;
m33 += t * m23;
m23 = 0.0F;
float temp = c * m12 - s * m13;
m13 = s * m12 + c * m13;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * r(i,1) - s * r(i,2);
r(i,2) = s * r(i,1) + c * r(i,2);
r(i,1) = temp;
}
}
}
lambda[0] = m11;
lambda[1] = m22;
lambda[2] = m33;
}
#endif

140
src/nvmath/Eigen.h Normal file
View File

@ -0,0 +1,140 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_EIGEN_H
#define NV_MATH_EIGEN_H
#include <nvcore/Containers.h> // swap
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
namespace nv
{
// Compute first eigen vector using the power method.
Vector3 firstEigenVector(float matrix[6]);
/// Generic eigen-solver.
class Eigen
{
public:
/// Ctor.
Eigen(uint n) : N(n)
{
uint size = n * (n + 1) / 2;
matrix = new float[size];
eigen_vec = new float[N*N];
eigen_val = new float[N];
}
/// Dtor.
~Eigen()
{
delete [] matrix;
delete [] eigen_vec;
delete [] eigen_val;
}
NVMATH_API void solve();
/// Matrix accesor.
float & operator()(uint x, uint y)
{
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
/// Matrix const accessor.
float operator()(uint x, uint y) const
{
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
Vector3 eigenVector3(uint i) const
{
nvCheck(3 == N);
nvCheck(i < N);
return Vector3(eigen_vec[i*N+0], eigen_vec[i*N+1], eigen_vec[i*N+2]);
}
Vector4 eigenVector4(uint i) const
{
nvCheck(4 == N);
nvCheck(i < N);
return Vector4(eigen_vec[i*N+0], eigen_vec[i*N+1], eigen_vec[i*N+2], eigen_vec[i*N+3]);
}
float eigenValue(uint i) const
{
nvCheck(i < N);
return eigen_val[i];
}
private:
const uint N;
float * matrix;
float * eigen_vec;
float * eigen_val;
};
/// 3x3 eigen-solver.
/// Based on Eric Lengyel's code:
/// http://www.terathon.com/code/linear.html
class Eigen3
{
public:
/** Ctor. */
Eigen3() {}
NVMATH_API void solve();
/// Matrix accesor.
float & operator()(uint x, uint y)
{
nvDebugCheck( x < 3 && y < 3 );
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
/// Matrix const accessor.
float operator()(uint x, uint y) const
{
nvDebugCheck( x < 3 && y < 3 );
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
/// Get ith eigen vector.
Vector3 eigenVector(uint i) const
{
nvCheck(i < 3);
return eigen_vec[i];
}
/** Get ith eigen value. */
float eigenValue(uint i) const
{
nvCheck(i < 3);
return eigen_val[i];
}
private:
float matrix[3+2+1];
Vector3 eigen_vec[3];
float eigen_val[3];
};
} // nv namespace
#endif // NV_MATH_EIGEN_H

134
src/nvmath/Fitting.cpp Normal file
View File

@ -0,0 +1,134 @@
// License: Wild Magic License Version 3
// http://geometrictools.com/License/WildMagic3License.pdf
#include "Fitting.h"
#include "Eigen.h"
using namespace nv;
/** Fit a 3d line to the given set of points.
*
* Based on code from:
* http://geometrictools.com/
*/
Line3 Fit::bestLine(const Array<Vector3> & pointArray)
{
nvDebugCheck(pointArray.count() > 0);
Line3 line;
const uint pointCount = pointArray.count();
const float inv_num = 1.0f / pointCount;
// compute the mean of the points
Vector3 center(zero);
for(uint i = 0; i < pointCount; i++) {
center += pointArray[i];
}
line.setOrigin(center * inv_num);
// compute the covariance matrix of the points
float covariance[6] = {0, 0, 0, 0, 0, 0};
for(uint i = 0; i < pointCount; i++) {
Vector3 diff = pointArray[i] - line.origin();
covariance[0] += diff.x() * diff.x();
covariance[1] += diff.x() * diff.y();
covariance[2] += diff.x() * diff.z();
covariance[3] += diff.y() * diff.y();
covariance[4] += diff.y() * diff.z();
covariance[5] += diff.z() * diff.z();
}
line.setDirection(normalizeSafe(firstEigenVector(covariance), Vector3(zero), 0.0f));
// @@ This variant is from David Eberly... I'm not sure how that works.
/*sum_xx *= inv_num;
sum_xy *= inv_num;
sum_xz *= inv_num;
sum_yy *= inv_num;
sum_yz *= inv_num;
sum_zz *= inv_num;
// set up the eigensolver
Eigen3 ES;
ES(0,0) = sum_yy + sum_zz;
ES(0,1) = -sum_xy;
ES(0,2) = -sum_xz;
ES(1,1) = sum_xx + sum_zz;
ES(1,2) = -sum_yz;
ES(2,2) = sum_xx + sum_yy;
// compute eigenstuff, smallest eigenvalue is in last position
ES.solve();
line.setDirection(ES.eigenVector(2));
nvCheck( isNormalized(line.direction()) );
*/
return line;
}
/** Fit a 3d plane to the given set of points.
*
* Based on code from:
* http://geometrictools.com/
*/
Vector4 Fit::bestPlane(const Array<Vector3> & pointArray)
{
Vector3 center(zero);
const uint pointCount = pointArray.count();
const float inv_num = 1.0f / pointCount;
// compute the mean of the points
for(uint i = 0; i < pointCount; i++) {
center += pointArray[i];
}
center *= inv_num;
// compute the covariance matrix of the points
float sum_xx = 0.0f;
float sum_xy = 0.0f;
float sum_xz = 0.0f;
float sum_yy = 0.0f;
float sum_yz = 0.0f;
float sum_zz = 0.0f;
for(uint i = 0; i < pointCount; i++) {
Vector3 diff = pointArray[i] - center;
sum_xx += diff.x() * diff.x();
sum_xy += diff.x() * diff.y();
sum_xz += diff.x() * diff.z();
sum_yy += diff.y() * diff.y();
sum_yz += diff.y() * diff.z();
sum_zz += diff.z() * diff.z();
}
sum_xx *= inv_num;
sum_xy *= inv_num;
sum_xz *= inv_num;
sum_yy *= inv_num;
sum_yz *= inv_num;
sum_zz *= inv_num;
// set up the eigensolver
Eigen3 ES;
ES(0,0) = sum_yy + sum_zz;
ES(0,1) = -sum_xy;
ES(0,2) = -sum_xz;
ES(1,1) = sum_xx + sum_zz;
ES(1,2) = -sum_yz;
ES(2,2) = sum_xx + sum_yy;
// compute eigenstuff, greatest eigenvalue is in first position
ES.solve();
Vector3 normal = ES.eigenVector(0);
nvCheck(isNormalized(normal));
float offset = dot(normal, center);
return Vector4(normal, offset);
}

78
src/nvmath/Fitting.h Normal file
View File

@ -0,0 +1,78 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_FITTING_H
#define NV_MATH_FITTING_H
#include <nvmath/Vector.h>
namespace nv
{
/// 3D Line.
struct Line3
{
/// Ctor.
Line3() : m_origin(zero), m_direction(zero)
{
}
/// Copy ctor.
Line3(const Line3 & l) : m_origin(l.m_origin), m_direction(l.m_direction)
{
}
/// Ctor.
Line3(Vector3::Arg o, Vector3::Arg d) : m_origin(o), m_direction(d)
{
}
/// Normalize the line.
void normalize()
{
m_direction = nv::normalize(m_direction);
}
/// Project a point onto the line.
Vector3 projectPoint(Vector3::Arg point) const
{
nvDebugCheck(isNormalized(m_direction));
Vector3 v = point - m_origin;
return m_origin + m_direction * dot(m_direction, v);
}
/// Compute distance to line.
float distanceToPoint(Vector3::Arg point) const
{
nvDebugCheck(isNormalized(m_direction));
Vector3 v = point - m_origin;
Vector3 l = v - m_direction * dot(m_direction, v);
return length(l);
}
const Vector3 & origin() const { return m_origin; }
void setOrigin(Vector3::Arg value) { m_origin = value; }
const Vector3 & direction() const { return m_direction; }
void setDirection(Vector3::Arg value) { m_direction = value; }
private:
Vector3 m_origin;
Vector3 m_direction;
};
namespace Fit
{
NVMATH_API Line3 bestLine(const Array<Vector3> & pointArray);
NVMATH_API Vector4 bestPlane(const Array<Vector3> & pointArray);
} // Fit namespace
} // nv namespace
#endif // _PI_MATHLIB_FITTING_H_

View File

@ -1,17 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "Plane.h"
#include "Matrix.h"
namespace nv
{
Plane transformPlane(const Matrix& m, Plane::Arg p)
{
Vector3 newVec = transformVector(m, p.vector());
Vector3 ptInPlane = p.offset() * p.vector();
ptInPlane = transformPoint(m, ptInPlane);
return Plane(newVec, ptInPlane);
}
}

View File

@ -1,77 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_PLANE_H
#define NV_MATH_PLANE_H
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
namespace nv
{
class Matrix;
class NVMATH_CLASS Plane
{
public:
typedef Plane const & Arg;
Plane();
Plane(float x, float y, float z, float w);
Plane(Vector4::Arg v);
Plane(Vector3::Arg v, float d);
Plane(Vector3::Arg normal, Vector3::Arg point);
const Plane & operator=(Plane::Arg v);
Vector3 vector() const;
scalar offset() const;
const Vector4 & asVector() const;
Vector4 & asVector();
void operator*=(scalar s);
private:
Vector4 p;
};
inline Plane::Plane() {}
inline Plane::Plane(float x, float y, float z, float w) : p(x, y, z, w) {}
inline Plane::Plane(Vector4::Arg v) : p(v) {}
inline Plane::Plane(Vector3::Arg v, float d) : p(v, d) {}
inline Plane::Plane(Vector3::Arg normal, Vector3::Arg point) : p(normal, dot(normal, point)) {}
inline const Plane & Plane::operator=(Plane::Arg v) { p = v.p; return *this; }
inline Vector3 Plane::vector() const { return p.xyz(); }
inline scalar Plane::offset() const { return p.w(); }
inline const Vector4 & Plane::asVector() const { return p; }
inline Vector4 & Plane::asVector() { return p; }
// Normalize plane.
inline Plane normalize(Plane::Arg plane, float epsilon = NV_EPSILON)
{
const float len = length(plane.vector());
nvDebugCheck(!isZero(len, epsilon));
const float inv = 1.0f / len;
return Plane(plane.asVector() * inv);
}
// Get the distance from the given point to this plane.
inline float distance(Plane::Arg plane, Vector3::Arg point)
{
return dot(plane.vector(), point) - plane.offset();
}
inline void Plane::operator*=(scalar s)
{
scale(p, s);
}
Plane transformPlane(const Matrix&, Plane::Arg);
} // nv namespace
#endif // NV_MATH_PLANE_H

View File

@ -13,10 +13,10 @@ SET(NVTT_SRCS
CompressDXT.cpp CompressDXT.cpp
CompressRGB.h CompressRGB.h
CompressRGB.cpp CompressRGB.cpp
FastCompressDXT.h
FastCompressDXT.cpp
QuickCompressDXT.h QuickCompressDXT.h
QuickCompressDXT.cpp QuickCompressDXT.cpp
OptimalCompressDXT.h
OptimalCompressDXT.cpp
SingleColorLookup.h SingleColorLookup.h
CompressionOptions.h CompressionOptions.h
CompressionOptions.cpp CompressionOptions.cpp
@ -43,9 +43,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVTT_EXPORTS) ADD_DEFINITIONS(-DNVTT_EXPORTS)
IF(NVTT_SHARED) IF(NVTT_SHARED)
ADD_DEFINITIONS(-DNVTT_SHARED=1) ADD_LIBRARY(nvtt SHARED ${DXT_SRCS})
ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
ELSE(NVTT_SHARED) ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS}) ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED) ENDIF(NVTT_SHARED)
@ -85,7 +84,7 @@ TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom DESTINATION bin) INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom DESTINATION bin)
# UI tools # UI tools
IF(QT4_FOUND AND NOT MSVC) IF(QT4_FOUND AND NOT MSVC)
SET(QT_USE_QTOPENGL TRUE) SET(QT_USE_QTOPENGL TRUE)
INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
@ -107,7 +106,7 @@ IF(QT4_FOUND AND NOT MSVC)
ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS}) ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS})
TARGET_LINK_LIBRARIES(nvcompressui ${LIBS}) TARGET_LINK_LIBRARIES(nvcompressui ${LIBS})
ENDIF(QT4_FOUND AND NOT MSVC) ENDIF(QT4_FOUND AND NOT MSVC)

View File

@ -29,8 +29,8 @@
#include "nvtt.h" #include "nvtt.h"
#include "CompressDXT.h" #include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "QuickCompressDXT.h" #include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"
#include "CompressionOptions.h" #include "CompressionOptions.h"
#include "OutputOptions.h" #include "OutputOptions.h"
@ -57,33 +57,26 @@ using namespace nv;
using namespace nvtt; using namespace nvtt;
nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None) void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
{ {
} const uint w = image->width();
const uint h = image->height();
nv::FastCompressor::~FastCompressor()
{
}
void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT1 block; BlockDXT1 block;
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y); rgba.init(image, x, y);
QuickCompress::compressDXT1(rgba, &block); if (rgba.isSingleColor())
{
QuickCompress::compressDXT1(rgba.color(0), &block);
}
else
{
QuickCompress::compressDXT1(rgba, &block);
}
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -93,17 +86,17 @@ void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptio
} }
void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions) void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & outputOptions)
{ {
const uint w = m_image->width(); const uint w = image->width();
const uint h = m_image->height(); const uint h = image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT1 block; BlockDXT1 block;
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y); rgba.init(image, x, y);
QuickCompress::compressDXT1a(rgba, &block); QuickCompress::compressDXT1a(rgba, &block);
@ -115,19 +108,18 @@ void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOpti
} }
void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions) void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{ {
const uint w = m_image->width(); const uint w = image->width();
const uint h = m_image->height(); const uint h = image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT3 block; BlockDXT3 block;
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y); rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
QuickCompress::compressDXT3(rgba, &block);
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -137,19 +129,18 @@ void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outpu
} }
void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions) void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{ {
const uint w = m_image->width(); const uint w = image->width();
const uint h = m_image->height(); const uint h = image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT5 block; BlockDXT5 block;
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y); rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
QuickCompress::compressDXT5(rgba, &block, 0);
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -159,21 +150,22 @@ void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outpu
} }
void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions) void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{ {
const uint w = m_image->width(); const uint w = image->width();
const uint h = m_image->height(); const uint h = image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT5 block; BlockDXT5 block;
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y); rgba.init(image, x, y);
// copy X coordinate to alpha channel and Y coordinate to green channel.
rgba.swizzleDXT5n(); rgba.swizzleDXT5n();
QuickCompress::compressDXT5(rgba, &block, 0); compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -183,28 +175,42 @@ void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outp
} }
nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None) void nv::fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{ {
// @@ TODO
// compress red channel (X)
} }
nv::SlowCompressor::~SlowCompressor()
void nv::fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{ {
// @@ TODO
// compress red, green channels (X,Y)
} }
void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
void nv::doPrecomputation()
{ {
m_image = image; static bool done = false; // @@ Stop using statics for reentrancy.
m_alphaMode = alphaMode;
if (!done)
{
done = true;
squish::FastClusterFit::DoPrecomputation();
}
} }
void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void nv::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{ {
const uint w = m_image->width(); const uint w = image->width();
const uint h = m_image->height(); const uint h = image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT1 block; BlockDXT1 block;
doPrecomputation();
//squish::WeightedClusterFit fit; //squish::WeightedClusterFit fit;
//squish::ClusterFit fit; //squish::ClusterFit fit;
squish::FastClusterFit fit; squish::FastClusterFit fit;
@ -213,11 +219,11 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y); rgba.init(image, x, y);
if (rgba.isSingleColor()) if (rgba.isSingleColor())
{ {
OptimalCompress::compressDXT1(rgba.color(0), &block); QuickCompress::compressDXT1(rgba.color(0), &block);
} }
else else
{ {
@ -234,10 +240,10 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
} }
void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{ {
const uint w = m_image->width(); const uint w = image->width();
const uint h = m_image->height(); const uint h = image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT1 block; BlockDXT1 block;
@ -248,27 +254,12 @@ void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compr
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y); rgba.init(image, x, y);
bool anyAlpha = false; // Compress color.
bool allAlpha = true; squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, squish::kDxt1);
for (uint i = 0; i < 16; i++) fit.Compress(&block);
{
if (rgba.color(i).a < 128) anyAlpha = true;
else allAlpha = false;
}
if ((!anyAlpha && rgba.isSingleColor() || allAlpha))
{
OptimalCompress::compressDXT1a(rgba.color(0), &block);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -278,37 +269,29 @@ void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compr
} }
void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void nv::compressDXT3(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{ {
const uint w = m_image->width(); const uint w = image->width();
const uint h = m_image->height(); const uint h = image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT3 block; BlockDXT3 block;
squish::WeightedClusterFit fit; squish::WeightedClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y); rgba.init(image, x, y);
// Compress explicit alpha. // Compress explicit alpha.
OptimalCompress::compressDXT3A(rgba, &block.alpha); compressBlock(rgba, &block.alpha);
// Compress color. // Compress color.
if (rgba.isSingleColor()) squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
{ fit.SetColourSet(&colours, 0);
OptimalCompress::compressDXT1(rgba.color(0), &block.color); fit.Compress(&block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -317,10 +300,10 @@ void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compre
} }
} }
void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void nv::compressDXT5(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{ {
const uint w = m_image->width(); const uint w = image->width();
const uint h = m_image->height(); const uint h = image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT5 block; BlockDXT5 block;
@ -331,29 +314,23 @@ void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compre
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y); rgba.init(image, x, y);
// Compress alpha. // Compress alpha.
uint error;
if (compressionOptions.quality == Quality_Highest) if (compressionOptions.quality == Quality_Highest)
{ {
OptimalCompress::compressDXT5A(rgba, &block.alpha); error = compressBlock_BruteForce(rgba, &block.alpha);
} }
else else
{ {
QuickCompress::compressDXT5A(rgba, &block.alpha); error = compressBlock_Iterative(rgba, &block.alpha);
} }
// Compress color. // Compress color.
if (rgba.isSingleColor()) squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
{ fit.SetColourSet(&colours, 0);
OptimalCompress::compressDXT1(rgba.color(0), &block.color); fit.Compress(&block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -363,33 +340,33 @@ void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compre
} }
void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{ {
const uint w = m_image->width(); const uint w = image->width();
const uint h = m_image->height(); const uint h = image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT5 block; BlockDXT5 block;
doPrecomputation();
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y); rgba.init(image, x, y);
// copy X coordinate to green channel and Y coordinate to alpha channel.
rgba.swizzleDXT5n(); rgba.swizzleDXT5n();
// Compress X. // Compress X.
uint error = compressBlock_Iterative(rgba, &block.alpha);
if (compressionOptions.quality == Quality_Highest) if (compressionOptions.quality == Quality_Highest)
{ {
OptimalCompress::compressDXT5A(rgba, &block.alpha); error = compressBlock_BruteForce(rgba, &block.alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block.alpha);
} }
// Compress Y. // Compress Y.
OptimalCompress::compressDXT1G(rgba, &block.color); QuickCompress::compressDXT1G(rgba, &block.color);
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -399,28 +376,32 @@ void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compr
} }
void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{ {
const uint w = m_image->width(); const uint w = image->width();
const uint h = m_image->height(); const uint h = image->height();
ColorBlock rgba; ColorBlock rgba;
AlphaBlockDXT5 block; AlphaBlockDXT5 block;
uint totalError = 0;
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y); rgba.init(image, x, y);
//error = compressBlock_BoundsRange(rgba, &block);
uint error = compressBlock_Iterative(rgba, &block);
if (compressionOptions.quality == Quality_Highest) if (compressionOptions.quality == Quality_Highest)
{ {
OptimalCompress::compressDXT5A(rgba, &block); // Try brute force algorithm.
} error = compressBlock_BruteForce(rgba, &block);
else
{
QuickCompress::compressDXT5A(rgba, &block);
} }
totalError += error;
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
} }
@ -429,10 +410,10 @@ void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compres
} }
void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{ {
const uint w = m_image->width(); const uint w = image->width();
const uint h = m_image->height(); const uint h = image->height();
ColorBlock xcolor; ColorBlock xcolor;
ColorBlock ycolor; ColorBlock ycolor;
@ -442,21 +423,24 @@ void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compres
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
xcolor.init(m_image, x, y); xcolor.init(image, x, y);
xcolor.splatX(); xcolor.splatX();
ycolor.init(m_image, x, y); ycolor.init(image, x, y);
ycolor.splatY(); ycolor.splatY();
// @@ Compute normal error, instead of separate xy errors.
uint xerror, yerror;
if (compressionOptions.quality == Quality_Highest) if (compressionOptions.quality == Quality_Highest)
{ {
OptimalCompress::compressDXT5A(xcolor, &block.x); xerror = compressBlock_BruteForce(xcolor, &block.x);
OptimalCompress::compressDXT5A(ycolor, &block.y); yerror = compressBlock_BruteForce(ycolor, &block.y);
} }
else else
{ {
QuickCompress::compressDXT5A(xcolor, &block.x); xerror = compressBlock_Iterative(xcolor, &block.x);
QuickCompress::compressDXT5A(ycolor, &block.y); yerror = compressBlock_Iterative(ycolor, &block.y);
} }
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {

View File

@ -32,46 +32,26 @@ namespace nv
class Image; class Image;
class FloatImage; class FloatImage;
class FastCompressor void doPrecomputation();
{
public: // Fast compressors.
FastCompressor(); void fastCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
~FastCompressor(); void fastCompressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void setImage(const Image * image, nvtt::AlphaMode alphaMode); void fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1(const nvtt::OutputOptions::Private & outputOptions); void fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions); void fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
class SlowCompressor
{
public:
SlowCompressor();
~SlowCompressor();
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
// Normal compressors.
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
// External compressors. // External compressors.
#if defined(HAVE_S3QUANT) #if defined(HAVE_S3QUANT)
void s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions); void s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);

View File

@ -115,18 +115,12 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
c |= PixelFormat::convert(src[x].b, 8, bsize) << bshift; c |= PixelFormat::convert(src[x].b, 8, bsize) << bshift;
c |= PixelFormat::convert(src[x].a, 8, asize) << ashift; c |= PixelFormat::convert(src[x].a, 8, asize) << ashift;
// Output one byte at a time. // Output one byte at a time. @@ Not tested... Does this work on LE and BE?
for (uint i = 0; i < byteCount; i++) for (uint i = 0; i < byteCount; i++)
{ {
*(dst + x * byteCount + i) = (c >> (i * 8)) & 0xFF; *(dst + x * byteCount) = (c >> (i * 8)) & 0xFF;
} }
} }
// Zero padding.
for (uint x = w * byteCount; x < pitch; x++)
{
*(dst + x) = 0;
}
} }
if (outputOptions.outputHandler != NULL) if (outputOptions.outputHandler != NULL)

View File

@ -34,7 +34,6 @@
#include <nvimage/Filter.h> #include <nvimage/Filter.h>
#include <nvimage/Quantize.h> #include <nvimage/Quantize.h>
#include <nvimage/NormalMap.h> #include <nvimage/NormalMap.h>
#include <nvimage/PixelFormat.h>
#include "Compressor.h" #include "Compressor.h"
#include "InputOptions.h" #include "InputOptions.h"
@ -42,6 +41,7 @@
#include "OutputOptions.h" #include "OutputOptions.h"
#include "CompressDXT.h" #include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "CompressRGB.h" #include "CompressRGB.h"
#include "cuda/CudaUtils.h" #include "cuda/CudaUtils.h"
#include "cuda/CudaCompressDXT.h" #include "cuda/CudaCompressDXT.h"
@ -200,7 +200,7 @@ namespace nvtt
AutoPtr<FloatImage> m_floatImage; AutoPtr<FloatImage> m_floatImage;
}; };
} // nvtt namespace }
Compressor::Compressor() : m(*new Compressor::Private()) Compressor::Compressor() : m(*new Compressor::Private())
@ -211,10 +211,6 @@ Compressor::Compressor() : m(*new Compressor::Private())
if (m.cudaEnabled) if (m.cudaEnabled)
{ {
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor(); m.cuda = new CudaCompressor();
if (!m.cuda->isValid()) if (!m.cuda->isValid())
@ -241,10 +237,6 @@ void Compressor::enableCudaAcceleration(bool enable)
if (m.cudaEnabled && m.cuda == NULL) if (m.cudaEnabled && m.cuda == NULL)
{ {
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor(); m.cuda = new CudaCompressor();
if (!m.cuda->isValid()) if (!m.cuda->isValid())
@ -346,7 +338,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
if (compressionOptions.format == Format_RGBA) if (compressionOptions.format == Format_RGBA)
{ {
header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount)); header.setPitch(4 * inputOptions.targetWidth);
header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask); header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask);
} }
else else
@ -430,7 +422,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
quantizeMipmap(mipmap, compressionOptions); quantizeMipmap(mipmap, compressionOptions);
compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions); compressMipmap(mipmap, compressionOptions, outputOptions);
// Compute extents of next mipmap: // Compute extents of next mipmap:
w = max(1U, w / 2); w = max(1U, w / 2);
@ -579,7 +571,7 @@ void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Priva
// Resize image. // Resize image.
BoxFilter boxFilter; BoxFilter boxFilter;
mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode)); mipmap.setImage(mipmap.asFloatImage()->downSample(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
} }
@ -626,6 +618,13 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
{ {
nvDebugCheck(mipmap.asFixedImage() != NULL); nvDebugCheck(mipmap.asFixedImage() != NULL);
if (compressionOptions.enableColorDithering)
{
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
{
Quantize::FloydSteinberg_RGB16(mipmap.asMutableFixedImage());
}
}
if (compressionOptions.binaryAlpha) if (compressionOptions.binaryAlpha)
{ {
if (compressionOptions.enableAlphaDithering) if (compressionOptions.enableAlphaDithering)
@ -637,67 +636,30 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold); Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
} }
} }
else
if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering)
{ {
uint rsize = 8;
uint gsize = 8;
uint bsize = 8;
uint asize = 8;
if (compressionOptions.enableColorDithering)
{
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
{
rsize = 5;
gsize = 6;
bsize = 5;
}
else if (compressionOptions.format == Format_RGB)
{
uint rshift, gshift, bshift;
PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize);
PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize);
PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize);
}
}
if (compressionOptions.enableAlphaDithering) if (compressionOptions.enableAlphaDithering)
{ {
if (compressionOptions.format == Format_DXT3) if (compressionOptions.format == Format_DXT3)
{ {
asize = 4; Quantize::Alpha4(mipmap.asMutableFixedImage());
} }
else if (compressionOptions.format == Format_RGB) else if (compressionOptions.format == Format_DXT1a)
{ {
uint ashift; Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize);
} }
} }
if (compressionOptions.binaryAlpha)
{
asize = 8; // Already quantized.
}
Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize);
} }
} }
// Compress the given mipmap. // Compress the given mipmap.
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{ {
const Image * image = mipmap.asFixedImage(); const Image * image = mipmap.asFixedImage();
nvDebugCheck(image != NULL); nvDebugCheck(image != NULL);
FastCompressor fast;
fast.setImage(image, inputOptions.alphaMode);
SlowCompressor slow;
slow.setImage(image, inputOptions.alphaMode);
if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB) if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
{ {
compressRGB(image, outputOptions, compressionOptions); compressRGB(image, outputOptions, compressionOptions);
@ -721,19 +683,18 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
#endif #endif
if (compressionOptions.quality == Quality_Fastest) if (compressionOptions.quality == Quality_Fastest)
{ {
fast.compressDXT1(outputOptions); fastCompressDXT1(image, outputOptions);
} }
else else
{ {
if (cudaEnabled) if (cudaEnabled)
{ {
nvDebugCheck(cudaSupported); nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode); cuda->compressDXT1(image, outputOptions, compressionOptions);
cuda->compressDXT1(compressionOptions, outputOptions);
} }
else else
{ {
slow.compressDXT1(compressionOptions, outputOptions); compressDXT1(image, outputOptions, compressionOptions);
} }
} }
} }
@ -741,18 +702,18 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{ {
if (compressionOptions.quality == Quality_Fastest) if (compressionOptions.quality == Quality_Fastest)
{ {
fast.compressDXT1a(outputOptions); fastCompressDXT1a(image, outputOptions);
} }
else else
{ {
if (cudaEnabled) if (cudaEnabled)
{ {
nvDebugCheck(cudaSupported); nvDebugCheck(cudaSupported);
/*cuda*/slow.compressDXT1a(compressionOptions, outputOptions); /*cuda*/compressDXT1a(image, outputOptions, compressionOptions);
} }
else else
{ {
slow.compressDXT1a(compressionOptions, outputOptions); compressDXT1a(image, outputOptions, compressionOptions);
} }
} }
} }
@ -760,19 +721,18 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{ {
if (compressionOptions.quality == Quality_Fastest) if (compressionOptions.quality == Quality_Fastest)
{ {
fast.compressDXT3(outputOptions); fastCompressDXT3(image, outputOptions);
} }
else else
{ {
if (cudaEnabled) if (cudaEnabled)
{ {
nvDebugCheck(cudaSupported); nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode); cuda->compressDXT3(image, outputOptions, compressionOptions);
cuda->compressDXT3(compressionOptions, outputOptions);
} }
else else
{ {
slow.compressDXT3(compressionOptions, outputOptions); compressDXT3(image, outputOptions, compressionOptions);
} }
} }
} }
@ -780,19 +740,18 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{ {
if (compressionOptions.quality == Quality_Fastest) if (compressionOptions.quality == Quality_Fastest)
{ {
fast.compressDXT5(outputOptions); fastCompressDXT5(image, outputOptions);
} }
else else
{ {
if (cudaEnabled) if (cudaEnabled)
{ {
nvDebugCheck(cudaSupported); nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode); cuda->compressDXT5(image, outputOptions, compressionOptions);
cuda->compressDXT5(compressionOptions, outputOptions);
} }
else else
{ {
slow.compressDXT5(compressionOptions, outputOptions); compressDXT5(image, outputOptions, compressionOptions);
} }
} }
} }
@ -800,20 +759,20 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{ {
if (compressionOptions.quality == Quality_Fastest) if (compressionOptions.quality == Quality_Fastest)
{ {
fast.compressDXT5n(outputOptions); fastCompressDXT5n(image, outputOptions);
} }
else else
{ {
slow.compressDXT5n(compressionOptions, outputOptions); compressDXT5n(image, outputOptions, compressionOptions);
} }
} }
else if (compressionOptions.format == Format_BC4) else if (compressionOptions.format == Format_BC4)
{ {
slow.compressBC4(compressionOptions, outputOptions); compressBC4(image, outputOptions, compressionOptions);
} }
else if (compressionOptions.format == Format_BC5) else if (compressionOptions.format == Format_BC5)
{ {
slow.compressBC5(compressionOptions, outputOptions); compressBC5(image, outputOptions, compressionOptions);
} }
return true; return true;

View File

@ -60,7 +60,7 @@ namespace nvtt
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const; void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const; void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const; void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; bool compressMipmap(const Mipmap & mipmap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
public: public:

1444
src/nvtt/FastCompressDXT.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,87 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_FASTCOMPRESSDXT_H
#define NV_TT_FASTCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
// Color compression:
// Compressor that uses the extremes of the luminance axis.
void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the extremes of the luminance axis.
void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box.
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box and takes alpha into account.
void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the best fit axis.
void compressBlock_BestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Simple, but slow compressor that tests all color pairs.
void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force 6d search along the best fit axis.
void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Spatial greedy search.
void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force compressor for DXT5n
void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
// Minimize error of the endpoints.
void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
// Alpha compression:
void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block);
uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
} // nv namespace
#endif // NV_TT_FASTCOMPRESSDXT_H

View File

@ -1,368 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
using namespace nv;
using namespace OptimalCompress;
namespace
{
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
// Choose quantized color that produces less error. Used by DXT3 compressor.
inline static uint quantize4(uint8 a)
{
int q0 = (a >> 4) - 1;
int q1 = (a >> 4);
int q2 = (a >> 4) + 1;
q0 = (q0 << 4) | q0;
q1 = (q1 << 4) | q1;
q2 = (q2 << 4) | q2;
int d0 = abs(q0 - a);
int d1 = abs(q1 - a);
int d2 = abs(q2 - a);
if (d0 < d1 && d0 < d2) return q0 >> 4;
if (d1 < d2) return q1 >> 4;
return q2 >> 4;
}
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best;
for (uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += besterror;
}
return totalError;
}
static void computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
block->setIndex(i, best);
}
}
} // namespace
// Single color compressor, based on:
// https://mollyrocket.com/forums/viewtopic.php?t=392
void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
{
if (rgba.a < 128)
{
dxtBlock->col0.u = 0;
dxtBlock->col1.u = 0;
dxtBlock->indices = 0xFFFFFFFF;
}
else
{
compressDXT1(rgba, dxtBlock);
}
}
// Brute force green channel compressor
void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
uint8 ming = 63;
uint8 maxg = 0;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
}
block->col0.r = 31;
block->col1.r = 31;
block->col0.g = maxg;
block->col1.g = ming;
block->col0.b = 0;
block->col1.b = 0;
if (maxg - ming > 4)
{
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
for (int g1 = ming; g1 < g0-4; g1++)
{
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
}
void OptimalCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
dxtBlock->alpha0 = quantize4(rgba.color(0).a);
dxtBlock->alpha1 = quantize4(rgba.color(1).a);
dxtBlock->alpha2 = quantize4(rgba.color(2).a);
dxtBlock->alpha3 = quantize4(rgba.color(3).a);
dxtBlock->alpha4 = quantize4(rgba.color(4).a);
dxtBlock->alpha5 = quantize4(rgba.color(5).a);
dxtBlock->alpha6 = quantize4(rgba.color(6).a);
dxtBlock->alpha7 = quantize4(rgba.color(7).a);
dxtBlock->alpha8 = quantize4(rgba.color(8).a);
dxtBlock->alpha9 = quantize4(rgba.color(9).a);
dxtBlock->alphaA = quantize4(rgba.color(10).a);
dxtBlock->alphaB = quantize4(rgba.color(11).a);
dxtBlock->alphaC = quantize4(rgba.color(12).a);
dxtBlock->alphaD = quantize4(rgba.color(13).a);
dxtBlock->alphaE = quantize4(rgba.color(14).a);
dxtBlock->alphaF = quantize4(rgba.color(15).a);
}
void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
{
uint8 mina = 255;
uint8 maxa = 0;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
mina = min(mina, alpha);
maxa = max(maxa, alpha);
}
dxtBlock->alpha0 = maxa;
dxtBlock->alpha1 = mina;
/*int centroidDist = 256;
int centroid;
// Get the closest to the centroid.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int dist = abs(alpha - (maxa + mina) / 2);
if (dist < centroidDist)
{
centroidDist = dist;
centroid = alpha;
}
}*/
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, dxtBlock);
int besta0 = maxa;
int besta1 = mina;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
//for (int a1 = mina; a1 < maxa; a1++)
{
//nvCheck(abs(a1-a0) > 8);
//if (abs(a0 - a1) < 8) continue;
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
if ((maxa-a0) + (a1-mina) > besterror)
continue;
dxtBlock->alpha0 = a0;
dxtBlock->alpha1 = a1;
int error = computeAlphaError(rgba, dxtBlock);
if (error < besterror)
{
besterror = error;
besta0 = a0;
besta1 = a1;
}
}
}
dxtBlock->alpha0 = besta0;
dxtBlock->alpha1 = besta1;
}
computeAlphaIndices(rgba, dxtBlock);
}

View File

@ -1,49 +0,0 @@
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_OPTIMALCOMPRESSDXT_H
#define NV_TT_OPTIMALCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
namespace OptimalCompress
{
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
}
} // nv namespace
#endif // NV_TT_OPTIMALCOMPRESSDXT_H

View File

@ -27,7 +27,7 @@
#include <nvimage/BlockDXT.h> #include <nvimage/BlockDXT.h>
#include "QuickCompressDXT.h" #include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h" #include "SingleColorLookup.h"
using namespace nv; using namespace nv;
@ -288,214 +288,123 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
dxtBlock->indices = computeIndices3(block, a, b); dxtBlock->indices = computeIndices3(block, a, b);
}*/ }*/
namespace
static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{ {
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block) for (int i = 0; i < 16; i++)
{ {
uint8 alphas[8]; uint idx = block->index(i);
block->evaluatePalette(alphas); float alpha;
if (idx < 2) alpha = 1.0f - idx;
uint totalError = 0; else alpha = (8.0f - idx) / 7.0f;
for (uint i = 0; i < 16; i++) float beta = 1 - alpha;
{
uint8 alpha = rgba.color(i).a; alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
uint besterror = 256*256; alphabeta_sum += alpha * beta;
uint best = 8; alphax_sum += alpha * rgba.color(i).a;
for(uint p = 0; p < 8; p++) betax_sum += beta * rgba.color(i).a;
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
totalError += besterror;
block->setIndex(i, best);
}
return totalError;
} }
static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block) const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 < alpha1)
{
swap(alpha0, alpha1);
// Flip indices:
for (int i = 0; i < 16; i++) for (int i = 0; i < 16; i++)
{ {
uint idx = block->index(i); uint idx = block->index(i);
float alpha; if (idx < 2) block->setIndex(i, 1 - idx);
if (idx < 2) alpha = 1.0f - idx; else block->setIndex(i, 9 - idx);
else alpha = (8.0f - idx) / 7.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * rgba.color(i).a;
betax_sum += beta * rgba.color(i).a;
} }
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 < alpha1)
{
swap(alpha0, alpha1);
// Flip indices:
for (int i = 0; i < 16; i++)
{
uint idx = block->index(i);
if (idx < 2) block->setIndex(i, 1 - idx);
else block->setIndex(i, 9 - idx);
}
}
else if (alpha0 == alpha1)
{
for (int i = 0; i < 16; i++)
{
block->setIndex(i, 0);
}
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
} }
else if (alpha0 == alpha1)
/*
static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{ {
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++) for (int i = 0; i < 16; i++)
{ {
uint8 x = rgba.color(i).a; block->setIndex(i, 0);
if (x == 0 || x == 255) continue;
uint bits = block->index(i);
if (bits == 6 || bits == 7) continue;
float alpha;
if (bits == 0) alpha = 1.0f;
else if (bits == 1) alpha = 0.0f;
else alpha = (6.0f - block->index(i)) / 5.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * x;
betax_sum += beta * x;
} }
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 > alpha1)
{
swap(alpha0, alpha1);
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
} }
*/
static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1) block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
// Single color compressor, based on:
// https://mollyrocket.com/forums/viewtopic.php?t=392
void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{ {
const uint64 mask = ~uint64(0xFFFF); swap(dxtBlock->col0.u, dxtBlock->col1.u);
return (block0.u | mask) == (block1.u | mask); dxtBlock->indices ^= 0x55555555;
} }
}
} // namespace
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{ {
if (rgba.isSingleColor()) // read block
Vector3 block[16];
extractColorBlockRGB(rgba, block);
// find min and max colors
Vector3 maxColor, minColor;
findMinMaxColorsBox(block, 16, &maxColor, &minColor);
selectDiagonal(block, 16, &maxColor, &minColor);
insetBBox(&maxColor, &minColor);
uint16 color0 = roundAndExpand(&maxColor);
uint16 color1 = roundAndExpand(&minColor);
if (color0 < color1)
{ {
OptimalCompress::compressDXT1(rgba.color(0), dxtBlock); swap(maxColor, minColor);
swap(color0, color1);
} }
else
{
// read block
Vector3 block[16];
extractColorBlockRGB(rgba, block);
// find min and max colors
Vector3 maxColor, minColor;
findMinMaxColorsBox(block, 16, &maxColor, &minColor);
selectDiagonal(block, 16, &maxColor, &minColor);
insetBBox(&maxColor, &minColor);
uint16 color0 = roundAndExpand(&maxColor);
uint16 color1 = roundAndExpand(&minColor);
if (color0 < color1) dxtBlock->col0 = Color16(color0);
{ dxtBlock->col1 = Color16(color1);
swap(maxColor, minColor); dxtBlock->indices = computeIndices4(block, maxColor, minColor);
swap(color0, color1);
}
dxtBlock->col0 = Color16(color0); optimizeEndPoints4(block, dxtBlock);
dxtBlock->col1 = Color16(color1);
dxtBlock->indices = computeIndices4(block, maxColor, minColor);
optimizeEndPoints4(block, dxtBlock);
}
} }
void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock) void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{ {
bool hasAlpha = false; if (!rgba.hasAlpha())
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a < 128) {
hasAlpha = true;
break;
}
}
if (!hasAlpha)
{ {
compressDXT1(rgba, dxtBlock); compressDXT1(rgba, dxtBlock);
} }
// @@ Handle single RGB, with varying alpha? We need tables for single color compressor in 3 color mode. else
//else if (rgba.isSingleColorNoAlpha()) { ... }
else
{ {
// read block // read block
Vector3 block[16]; Vector3 block[16];
@ -527,59 +436,160 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
} }
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
// Brute force green channel compressor
void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
uint8 ming = 63;
uint8 maxg = 0;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
}
block->col0.r = 31;
block->col1.r = 31;
block->col0.g = maxg;
block->col1.g = ming;
block->col0.b = 0;
block->col1.b = 0;
if (maxg - ming > 4)
{
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
for (int g1 = ming; g1 < g0-4; g1++)
{
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
}
void QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
dxtBlock->alpha0 = rgba.color(0).a >> 4;
dxtBlock->alpha1 = rgba.color(1).a >> 4;
dxtBlock->alpha2 = rgba.color(2).a >> 4;
dxtBlock->alpha3 = rgba.color(3).a >> 4;
dxtBlock->alpha4 = rgba.color(4).a >> 4;
dxtBlock->alpha5 = rgba.color(5).a >> 4;
dxtBlock->alpha6 = rgba.color(6).a >> 4;
dxtBlock->alpha7 = rgba.color(7).a >> 4;
dxtBlock->alpha8 = rgba.color(8).a >> 4;
dxtBlock->alpha9 = rgba.color(9).a >> 4;
dxtBlock->alphaA = rgba.color(10).a >> 4;
dxtBlock->alphaB = rgba.color(11).a >> 4;
dxtBlock->alphaC = rgba.color(12).a >> 4;
dxtBlock->alphaD = rgba.color(13).a >> 4;
dxtBlock->alphaE = rgba.color(14).a >> 4;
dxtBlock->alphaF = rgba.color(15).a >> 4;
}
void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock) void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
{ {
compressDXT1(rgba, &dxtBlock->color); compressDXT1(rgba, &dxtBlock->color);
OptimalCompress::compressDXT3A(rgba, &dxtBlock->alpha); compressDXT3A(rgba, &dxtBlock->alpha);
} }
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount/*=8*/)
{ {
uint8 alpha0 = 0; // @@ TODO
uint8 alpha1 = 255;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
alpha0 = max(alpha0, alpha);
alpha1 = min(alpha1, alpha);
}
AlphaBlockDXT5 block;
block.alpha0 = alpha0 - (alpha0 - alpha1) / 34;
block.alpha1 = alpha1 + (alpha0 - alpha1) / 34;
uint besterror = computeAlphaIndices(rgba, &block);
AlphaBlockDXT5 bestblock = block;
for (int i = 0; i < iterationCount; i++)
{
optimizeAlpha8(rgba, &block);
uint error = computeAlphaIndices(rgba, &block);
if (error >= besterror)
{
// No improvement, stop.
break;
}
if (sameIndices(block, bestblock))
{
bestblock = block;
break;
}
besterror = error;
bestblock = block;
};
// Copy best block to result;
*dxtBlock = bestblock;
} }
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/) void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock)
{ {
compressDXT1(rgba, &dxtBlock->color); compressDXT1(rgba, &dxtBlock->color);
compressDXT5A(rgba, &dxtBlock->alpha, iterationCount); compressDXT5A(rgba, &dxtBlock->alpha);
} }

View File

@ -37,13 +37,16 @@ namespace nv
namespace QuickCompress namespace QuickCompress
{ {
void compressDXT1(const Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock); void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock); void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock); void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8); void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8); void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock);
} }
} // nv namespace } // nv namespace

View File

@ -159,7 +159,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
} }
} }
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor) __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16])
{ {
const int bid = blockIdx.x; const int bid = blockIdx.x;
const int idx = threadIdx.x; const int idx = threadIdx.x;
@ -189,11 +189,6 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
colorSums(colors, sums); colorSums(colors, sums);
float3 axis = bestFitLine(colors, sums[0], kColorMetric); float3 axis = bestFitLine(colors, sums[0], kColorMetric);
*sameColor = (axis == make_float3(0, 0, 0));
// Single color compressor needs unweighted colors.
if (*sameColor) colors[idx] = rawColors[idx];
dps[idx] = dot(rawColors[idx], axis); dps[idx] = dot(rawColors[idx], axis);
#if __DEVICE_EMULATION__ #if __DEVICE_EMULATION__
@ -597,40 +592,6 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
} }
*/ */
__device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
errors[idx] = bestError;
}
__device__ void evalLevel4Permutations(const float3 * colors, const float * weights, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) __device__ void evalLevel4Permutations(const float3 * colors, const float * weights, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{ {
const int idx = threadIdx.x; const int idx = threadIdx.x;
@ -666,6 +627,7 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Find index with minimum error // Find index with minimum error
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -836,39 +798,6 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint
} }
} }
__global__ void compressLevel4DXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float3 colors[16];
__shared__ float3 sums[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalLevel4Permutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result) __global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result)
{ {
@ -876,18 +805,11 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
__shared__ float3 sums[16]; __shared__ float3 sums[16];
__shared__ float weights[16]; __shared__ float weights[16];
__shared__ int xrefs[16]; __shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, weights, xrefs, &sameColor); loadColorBlock(image, colors, sums, weights, xrefs);
__syncthreads(); __syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd; ushort bestStart, bestEnd;
uint bestPermutation; uint bestPermutation;
@ -1111,11 +1033,6 @@ extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result
compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result); compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
} }
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressLevel4DXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{ {
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result); compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);

View File

@ -24,13 +24,13 @@
#include <nvcore/Debug.h> #include <nvcore/Debug.h>
#include <nvcore/Containers.h> #include <nvcore/Containers.h>
#include <nvmath/Color.h> #include <nvmath/Color.h>
#include <nvmath/Fitting.h>
#include <nvimage/Image.h> #include <nvimage/Image.h>
#include <nvimage/ColorBlock.h> #include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h> #include <nvimage/BlockDXT.h>
#include <nvtt/CompressionOptions.h> #include <nvtt/CompressionOptions.h>
#include <nvtt/OutputOptions.h> #include <nvtt/OutputOptions.h>
#include <nvtt/QuickCompressDXT.h> #include <nvtt/FastCompressDXT.h>
#include <nvtt/OptimalCompressDXT.h>
#include "CudaCompressDXT.h" #include "CudaCompressDXT.h"
#include "CudaUtils.h" #include "CudaUtils.h"
@ -53,7 +53,6 @@ using namespace nvtt;
extern "C" void setupCompressKernel(const float weights[3]); extern "C" void setupCompressKernel(const float weights[3]);
extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
#include "Bitmaps.h" // @@ Rename to BitmapTable.h #include "Bitmaps.h" // @@ Rename to BitmapTable.h
@ -120,25 +119,20 @@ bool CudaCompressor::isValid() const
// @@ This code is very repetitive and needs to be cleaned up. // @@ This code is very repetitive and needs to be cleaned up.
void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
/// Compress image using CUDA. /// Compress image using CUDA.
void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void CudaCompressor::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{ {
nvDebugCheck(cuda::isHardwarePresent()); nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA #if defined HAVE_CUDA
// Image size in blocks. // Image size in blocks.
const uint w = (m_image->width() + 3) / 4; const uint w = (image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4; const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32); uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize); uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU! convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h; const uint blockNum = w * h;
const uint compressedSize = blockNum * 8; const uint compressedSize = blockNum * 8;
@ -183,7 +177,7 @@ void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressio
} }
clock_t end = clock(); clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage); free(blockLinearImage);
@ -197,18 +191,18 @@ void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressio
/// Compress image using CUDA. /// Compress image using CUDA.
void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void CudaCompressor::compressDXT3(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{ {
nvDebugCheck(cuda::isHardwarePresent()); nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA #if defined HAVE_CUDA
// Image size in blocks. // Image size in blocks.
const uint w = (m_image->width() + 3) / 4; const uint w = (image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4; const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32); uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize); uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage); convertToBlockLinear(image, blockLinearImage);
const uint blockNum = w * h; const uint blockNum = w * h;
const uint compressedSize = blockNum * 8; const uint compressedSize = blockNum * 8;
@ -228,20 +222,13 @@ void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressio
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel. // Launch kernel.
if (m_alphaMode == AlphaMode_Transparency) compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU. // Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++) for (uint i = 0; i < count; i++)
{ {
ColorBlock rgba(blockLinearImage + (bn + i) * 16); ColorBlock rgba(blockLinearImage + (bn + i) * 16);
OptimalCompress::compressDXT3A(rgba, alphaBlocks + i); compressBlock(rgba, alphaBlocks + i);
} }
// Check for errors. // Check for errors.
@ -273,7 +260,7 @@ void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressio
} }
clock_t end = clock(); clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks); free(alphaBlocks);
free(blockLinearImage); free(blockLinearImage);
@ -288,18 +275,18 @@ void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressio
/// Compress image using CUDA. /// Compress image using CUDA.
void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void CudaCompressor::compressDXT5(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{ {
nvDebugCheck(cuda::isHardwarePresent()); nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA #if defined HAVE_CUDA
// Image size in blocks. // Image size in blocks.
const uint w = (m_image->width() + 3) / 4; const uint w = (image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4; const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32); uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize); uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage); convertToBlockLinear(image, blockLinearImage);
const uint blockNum = w * h; const uint blockNum = w * h;
const uint compressedSize = blockNum * 8; const uint compressedSize = blockNum * 8;
@ -319,20 +306,13 @@ void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressio
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel. // Launch kernel.
if (m_alphaMode == AlphaMode_Transparency) compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU. // Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++) for (uint i = 0; i < count; i++)
{ {
ColorBlock rgba(blockLinearImage + (bn + i) * 16); ColorBlock rgba(blockLinearImage + (bn + i) * 16);
QuickCompress::compressDXT5A(rgba, alphaBlocks + i); compressBlock_Iterative(rgba, alphaBlocks + i);
} }
// Check for errors. // Check for errors.
@ -364,7 +344,7 @@ void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressio
} }
clock_t end = clock(); clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks); free(alphaBlocks);
free(blockLinearImage); free(blockLinearImage);
@ -378,3 +358,185 @@ void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressio
} }
#if 0
class Task
{
public:
explicit Task(uint numBlocks) : blockMaxCount(numBlocks), blockCount(0)
{
// System memory allocations.
blockLinearImage = new uint[blockMaxCount * 16];
xrefs = new uint[blockMaxCount * 16];
// Device memory allocations.
cudaMalloc((void**) &d_blockLinearImage, blockMaxCount * 16 * sizeof(uint));
cudaMalloc((void**) &d_compressedImage, blockMaxCount * 8U);
// @@ Check for allocation errors.
}
~Task()
{
delete [] blockLinearImage;
delete [] xrefs;
cudaFree(d_blockLinearImage);
cudaFree(d_compressedImage);
}
void addColorBlock(const ColorBlock & rgba)
{
nvDebugCheck(!isFull());
// @@ Count unique colors?
/*
// Convert colors to vectors.
Array<Vector3> pointArray(16);
for(int i = 0; i < 16; i++) {
const Color32 color = rgba.color(i);
pointArray.append(Vector3(color.r, color.g, color.b));
}
// Find best fit line.
const Vector3 axis = Fit::bestLine(pointArray).direction();
// Project points to axis.
float dps[16];
uint * order = &xrefs[blockCount * 16];
for (uint i = 0; i < 16; ++i)
{
dps[i] = dot(pointArray[i], axis);
order[i] = i;
}
// Sort them.
for (uint i = 0; i < 16; ++i)
{
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
{
swap(dps[j], dps[j - 1]);
swap(order[j], order[j - 1]);
}
}
*/
// Write sorted colors to blockLinearImage.
for(uint i = 0; i < 16; ++i)
{
// blockLinearImage[blockCount * 16 + i] = rgba.color(order[i]);
blockLinearImage[blockCount * 16 + i] = rgba.color(i);
}
++blockCount;
}
bool isFull()
{
nvDebugCheck(blockCount <= blockMaxCount);
return blockCount == blockMaxCount;
}
void flush(const OutputOptions::Private & outputOptions)
{
if (blockCount == 0)
{
// Nothing to do.
return;
}
// Copy input color blocks.
cudaMemcpy(d_blockLinearImage, blockLinearImage, blockCount * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernelDXT1(blockCount, d_blockLinearImage, d_compressedImage, d_bitmaps);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
uint * compressedImage = blockLinearImage;
cudaMemcpy(compressedImage, d_compressedImage, blockCount * 8, cudaMemcpyDeviceToHost);
// @@ Sort block indices.
// Output result.
if (outputOptions.outputHandler != NULL)
{
// outputOptions.outputHandler->writeData(compressedImage, blockCount * 8);
}
blockCount = 0;
}
private:
const uint blockMaxCount;
uint blockCount;
uint * blockLinearImage;
uint * xrefs;
uint * d_blockLinearImage;
uint * d_compressedImage;
};
void nv::cudaCompressDXT1_2(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
#if defined HAVE_CUDA
const uint w = image->width();
const uint h = image->height();
const uint blockNum = ((w + 3) / 4) * ((h + 3) / 4);
const uint blockMax = 32768; // 49152, 65535
setupCompressKernelDXT1(compressionOptions.colorWeight.ptr());
ColorBlock rgba;
Task task(min(blockNum, blockMax));
clock_t start = clock();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
task.addColorBlock(rgba);
if (task.isFull())
{
task.flush(outputOptions);
}
}
}
task.flush(outputOptions);
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
#endif // 0

View File

@ -39,20 +39,15 @@ namespace nv
bool isValid() const; bool isValid() const;
void setImage(const Image * image, nvtt::AlphaMode alphaMode); void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private: private:
uint * m_bitmapTable; uint * m_bitmapTable;
uint * m_data; uint * m_data;
uint * m_result; uint * m_result;
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
}; };
} // nv namespace } // nv namespace

View File

@ -22,7 +22,6 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h> #include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h" #include "CudaUtils.h"
#if defined HAVE_CUDA #if defined HAVE_CUDA
@ -53,58 +52,29 @@ static bool isWow32()
{ {
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process"); LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE; BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process) if (NULL != fnIsWow64Process)
{ {
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{ {
// Assume 32 bits. // Assume 32 bits.
return true; return true;
} }
} }
return !bIsWow64; return !bIsWow64;
} }
#endif #endif
static bool isCudaDriverAvailable(uint version)
{
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
return false;
}
if (version > 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) return false;
}
if (version > 2010)
{
void * address = nvcuda.bindSymbol("cuLoadDataEx");
if (address == NULL) return false;
}
return true;
}
/// Determine if CUDA is available. /// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent() bool nv::cuda::isHardwarePresent()
{ {
#if defined HAVE_CUDA #if defined HAVE_CUDA
#if NV_OS_WIN32 #if NV_OS_WIN32
//if (isWindowsVista()) return false; if (isWindowsVista()) return false;
//if (isWindowsVista() || !isWow32()) return false; //if (isWindowsVista() || !isWow32()) return false;
#endif #endif
int count = deviceCount(); int count = deviceCount();
@ -120,12 +90,6 @@ bool nv::cuda::isHardwarePresent()
return false; return false;
} }
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
return false;
}
// @@ Make sure that warp size == 32 // @@ Make sure that warp size == 32
} }
@ -151,35 +115,6 @@ int nv::cuda::deviceCount()
return 0; return 0;
} }
int nv::cuda::getFastestDevice()
{
int max_gflops_device = 0;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
int current_device = 0;
while (current_device < device_count)
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, current_device);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major != -1 && device_properties.minor != -1)
{
if( gflops > max_gflops )
{
max_gflops = gflops;
max_gflops_device = current_device;
}
}
current_device++;
}
#endif
return max_gflops_device;
}
/// Activate the given devices. /// Activate the given devices.
bool nv::cuda::setDevice(int i) bool nv::cuda::setDevice(int i)
{ {

View File

@ -31,7 +31,6 @@ namespace nv
{ {
bool isHardwarePresent(); bool isHardwarePresent();
int deviceCount(); int deviceCount();
int getFastestDevice();
bool setDevice(int i); bool setDevice(int i);
}; };

View File

@ -207,6 +207,7 @@ NVTT_API void nvttDestroyCompressionOptions(NvttCompressionOptions * compression
NVTT_API void nvttSetCompressionOptionsFormat(NvttCompressionOptions * compressionOptions, NvttFormat format); NVTT_API void nvttSetCompressionOptionsFormat(NvttCompressionOptions * compressionOptions, NvttFormat format);
NVTT_API void nvttSetCompressionOptionsQuality(NvttCompressionOptions * compressionOptions, NvttQuality quality); NVTT_API void nvttSetCompressionOptionsQuality(NvttCompressionOptions * compressionOptions, NvttQuality quality);
NVTT_API void nvttSetCompressionOptionsColorWeights(NvttCompressionOptions * compressionOptions, float red, float green, float blue, float alpha); NVTT_API void nvttSetCompressionOptionsColorWeights(NvttCompressionOptions * compressionOptions, float red, float green, float blue, float alpha);
NVTT_API void nvttEnableCompressionOptionsCudaCompression(NvttCompressionOptions * compressionOptions, NvttBoolean enable);
NVTT_API void nvttSetCompressionOptionsPixelFormat(NvttCompressionOptions * compressionOptions, unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask); NVTT_API void nvttSetCompressionOptionsPixelFormat(NvttCompressionOptions * compressionOptions, unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask);
NVTT_API void nvttSetCompressionOptionsQuantization(NvttCompressionOptions * compressionOptions, NvttBoolean colorDithering, NvttBoolean alphaDithering, NvttBoolean binaryAlpha, int alphaThreshold); NVTT_API void nvttSetCompressionOptionsQuantization(NvttCompressionOptions * compressionOptions, NvttBoolean colorDithering, NvttBoolean alphaDithering, NvttBoolean binaryAlpha, int alphaThreshold);

View File

@ -29,8 +29,6 @@
#include "colourblock.h" #include "colourblock.h"
#include <cfloat> #include <cfloat>
#include "fastclusterlookup.inl"
namespace squish { namespace squish {
FastClusterFit::FastClusterFit() FastClusterFit::FastClusterFit()
@ -99,6 +97,91 @@ void FastClusterFit::SetColourSet( ColourSet const* colours, int flags )
} }
struct Precomp {
float alpha2_sum;
float beta2_sum;
float alphabeta_sum;
float factor;
};
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
void FastClusterFit::DoPrecomputation()
{
int i = 0;
// Three element clusters:
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
int c2 = 16 - c0 - c1;
/*if (c2 == 16) {
// a = b = x2 / 16
s_threeElement[i].alpha2_sum = 0;
s_threeElement[i].beta2_sum = 16;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_threeElement[i].alpha2_sum = 16;
s_threeElement[i].beta2_sum = 0;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_threeElement[i].alpha2_sum = c0 + c1 * 0.25f;
s_threeElement[i].beta2_sum = c2 + c1 * 0.25f;
s_threeElement[i].alphabeta_sum = c1 * 0.25f;
s_threeElement[i].factor = 1.0f / (s_threeElement[i].alpha2_sum * s_threeElement[i].beta2_sum - s_threeElement[i].alphabeta_sum * s_threeElement[i].alphabeta_sum);
}
i++;
}
}
//printf("%d three cluster elements\n", i);
// Four element clusters:
i = 0;
for( int c0 = 0; c0 <= 16; c0++)
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
int c3 = 16 - c0 - c1 - c2;
/*if (c3 == 16) {
// a = b = x3 / 16
s_fourElement[i].alpha2_sum = 16.0f;
s_fourElement[i].beta2_sum = 0.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_fourElement[i].alpha2_sum = 0.0f;
s_fourElement[i].beta2_sum = 16.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_fourElement[i].alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
s_fourElement[i].beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
s_fourElement[i].alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
s_fourElement[i].factor = 1.0f / (s_fourElement[i].alpha2_sum * s_fourElement[i].beta2_sum - s_fourElement[i].alphabeta_sum * s_fourElement[i].alphabeta_sum);
}
i++;
}
}
}
//printf("%d four cluster elements\n", i);
}
void FastClusterFit::SetMetric(float r, float g, float b) void FastClusterFit::SetMetric(float r, float g, float b)
{ {
#if SQUISH_USE_SIMD #if SQUISH_USE_SIMD

View File

@ -44,6 +44,8 @@ public:
void SetMetric(float r, float g, float b); void SetMetric(float r, float g, float b);
float GetBestError() const; float GetBestError() const;
static void DoPrecomputation();
// Make them public // Make them public
virtual void Compress3( void* block ); virtual void Compress3( void* block );
virtual void Compress4( void* block ); virtual void Compress4( void* block );

File diff suppressed because it is too large Load Diff

View File

@ -42,11 +42,11 @@ struct MyOutputHandler : public nvtt::OutputHandler
MyOutputHandler(const char * name) : total(0), progress(0), percentage(0), stream(new nv::StdOutputStream(name)) {} MyOutputHandler(const char * name) : total(0), progress(0), percentage(0), stream(new nv::StdOutputStream(name)) {}
virtual ~MyOutputHandler() { delete stream; } virtual ~MyOutputHandler() { delete stream; }
void setTotal(int64 t) virtual void setTotal(int64 t)
{ {
total = t + 128; total = t + 128;
} }
void setDisplayProgress(bool b) virtual void setDisplayProgress(bool b)
{ {
verbose = b; verbose = b;
} }
@ -373,6 +373,7 @@ int main(int argc, char *argv[])
inputOptions.setMipmapGeneration(false); inputOptions.setMipmapGeneration(false);
} }
nvtt::CompressionOptions compressionOptions; nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(format); compressionOptions.setFormat(format);
if (fast) if (fast)
@ -396,21 +397,6 @@ int main(int argc, char *argv[])
compressionOptions.setExternalCompressor(externalCompressor); compressionOptions.setExternalCompressor(externalCompressor);
} }
if (format == nvtt::Format_RGB)
{
compressionOptions.setQuantization(true, false, false);
//compressionOptions.setPixelFormat(16, 0xF000, 0x0F00, 0x00F0, 0x000F);
compressionOptions.setPixelFormat(16,
0x0F00,
0x00F0,
0x000F,
0xF000);
// 0x003F0000,
// 0x00003F00,
// 0x0000003F,
// 0x3F000000);
}
MyErrorHandler errorHandler; MyErrorHandler errorHandler;
MyOutputHandler outputHandler(output); MyOutputHandler outputHandler(output);

View File

@ -73,12 +73,10 @@ int main(int argc, char *argv[])
float scale = 0.5f; float scale = 0.5f;
float gamma = 2.2f; float gamma = 2.2f;
nv::AutoPtr<nv::Filter> filter; nv::Filter * filter = NULL;
nv::Path input; nv::Path input;
nv::Path output; nv::Path output;
nv::FloatImage::WrapMode wrapMode = nv::FloatImage::WrapMode_Mirror;
// Parse arguments. // Parse arguments.
for (int i = 1; i < argc; i++) for (int i = 1; i < argc; i++)
{ {
@ -110,18 +108,9 @@ int main(int argc, char *argv[])
else if (strcmp("lanczos", argv[i]) == 0) filter = new nv::LanczosFilter(); else if (strcmp("lanczos", argv[i]) == 0) filter = new nv::LanczosFilter();
else if (strcmp("kaiser", argv[i]) == 0) { else if (strcmp("kaiser", argv[i]) == 0) {
filter = new nv::KaiserFilter(3); filter = new nv::KaiserFilter(3);
((nv::KaiserFilter *)filter.ptr())->setParameters(4.0f, 1.0f); ((nv::KaiserFilter *)filter)->setParameters(4.0f, 1.0f);
} }
} }
else if (strcmp("-f", argv[i]) == 0)
{
if (i+1 == argc) break;
i++;
if (strcmp("mirror", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Mirror;
else if (strcmp("repeat", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Repeat;
else if (strcmp("clamp", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Clamp;
}
else if (argv[i][0] != '-') else if (argv[i][0] != '-')
{ {
input = argv[i]; input = argv[i];
@ -151,10 +140,6 @@ int main(int argc, char *argv[])
printf(" * mitchell\n"); printf(" * mitchell\n");
printf(" * lanczos\n"); printf(" * lanczos\n");
printf(" * kaiser\n"); printf(" * kaiser\n");
printf(" -w mode One of the following: (default = 'mirror')\n");
printf(" * mirror\n");
printf(" * repeat\n");
printf(" * clamp\n");
return 1; return 1;
} }
@ -170,14 +155,15 @@ int main(int argc, char *argv[])
nv::FloatImage fimage(&image); nv::FloatImage fimage(&image);
fimage.toLinear(0, 3, gamma); fimage.toLinear(0, 3, gamma);
nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(*filter, uint(image.width() * scale), uint(image.height() * scale), wrapMode)); nv::AutoPtr<nv::FloatImage> fresult(fimage.downSample(*filter, uint(image.width() * scale), uint(image.height() * scale), nv::FloatImage::WrapMode_Mirror));
nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma)); nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
result->setFormat(nv::Image::Format_ARGB);
nv::StdOutputStream stream(output); nv::StdOutputStream stream(output);
nv::ImageIO::saveTGA(stream, result.ptr()); // @@ Add generic save function. Add support for png too. nv::ImageIO::saveTGA(stream, result.ptr()); // @@ Add generic save function. Add support for png too.
delete filter;
return 0; return 0;
} }