2 Commits
2.0.7 ... 2.0.2

Author SHA1 Message Date
1df4bb6980 Fix changelog. 2008-04-17 09:04:57 +00:00
0294c4ad93 Tag 2.0.2 release. 2008-04-17 08:59:21 +00:00
68 changed files with 2947 additions and 4187 deletions

View File

@ -1,4 +1,4 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0)
CMAKE_MINIMUM_REQUIRED(VERSION 2.4.0)
PROJECT(NV)
ENABLE_TESTING()
@ -16,13 +16,6 @@ MESSAGE(STATUS "Setting optimal options")
MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}")
MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}")
IF(NVTT_SHARED)
SET(NVCORE_SHARED TRUE)
SET(NVMATH_SHARED TRUE)
SET(NVIMAGE_SHARED TRUE)
ENDIF(NVTT_SHARED)
ADD_SUBDIRECTORY(src)
IF(WIN32)

View File

@ -1,43 +1,3 @@
NVIDIA Texture Tools version 2.0.7
* Output correct exit codes. Fixes issue 92.
* Fix thread-safety errors. Fixes issue 90.
* Add SIMD power method. Fixes issue 94.
* Interact better with applications that already use CUDA.
* Faster CPU compression.
NVIDIA Texture Tools version 2.0.6
* Fix dll version checking.
* Detect CUDA 2.1 and future CUDA versions correctly.
* Print CUDA detection message in nvcompress.
* Select the fastest CUDA device.
* Compile squish with -fPIC. Fixes issue 74.
* Fix warnings under gcc 4.3.2.
* Fix nvzoom option typo by Frank Richter. Fixes issue 81.
* Do not use CUDA to compress small mipmaps. Fixes issue 76.
* Compute mipmaps of semi-transparent images correctly.
* Shutdown CUDA properly. Fixes issue 83.
* Fix pixel format converions. Fixes issue 87.
* Update single color compression tables. Fixes issue 85.
NVIDIA Texture Tools version 2.0.5
* Fix error in single color compressor. Fixes issue 66.
* Detect mismatch between CUDA runtime and driver, and disable CUDA in that case.
* Fix cmake files when compiling NVTT as a shared library.
* When linking nvtt dynamically on unix, link all libraries dynamically.
* Select fastest CUDA device.
NVIDIA Texture Tools version 2.0.4
* Fix error in RGB format output; reported by jonsoh. See issue 49.
* Added support RGB format dithering by jonsoh. Fixes issue 50 and 51.
* Prevent infinite loop in indexMirror when width equal 1. Fixes issue 65.
* Implement general scale filter, including upsampling.
NVIDIA Texture Tools version 2.0.3
* More accurate DXT3 compressor. Fixes issue 38.
* Remove legacy compressors. Fix issue 34.
* Check for single color in all compressors. Fixes issue 43.
* Fix error in fast downsample filter, reported by Noel Llopis.
NVIDIA Texture Tools version 2.0.2
* Fix copy ctor error reported by Richard Sim.
* Fix indexMirror error reported by Chris Lambert.

View File

@ -1 +1 @@
2.0.7
2.0.2

View File

@ -57,7 +57,7 @@ MARK_AS_ADVANCED (CUDA_FOUND CUDA_COMPILER CUDA_RUNTIME_LIBRARY)
#SET(CUDA_OPTIONS "-ncfe")
SET(CUDA_OPTIONS "--host-compilation=C")
SET(CUDA_OPTIONS "")
IF (CUDA_EMULATION)
SET (CUDA_OPTIONS "${CUDA_OPTIONS} -deviceemu")

2
configure vendored
View File

@ -53,7 +53,7 @@ echo "-- Configuring nvidia-texture-tools "`cat VERSION`
mkdir -p ./build
cd ./build
$CMAKE .. -DNVTT_SHARED=1 -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1
$CMAKE .. -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1
cd ..
echo ""

0
gnuwin32/bin/libpng12.dll Executable file → Normal file
View File

View File

@ -281,10 +281,6 @@
RelativePath="..\..\..\src\nvcore\Debug.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Library.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Memory.cpp"
>
@ -319,10 +315,6 @@
RelativePath="..\..\..\src\nvcore\DefsVcWin32.h"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Library.h"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Memory.h"
>

View File

@ -355,10 +355,6 @@
RelativePath="..\..\..\src\nvimage\nvimage.h"
>
</File>
<File
RelativePath="..\..\..\src\nvimage\PixelFormat.h"
>
</File>
<File
RelativePath="..\..\..\src\nvimage\PsdFile.h"
>

View File

@ -53,8 +53,8 @@ END
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 2,0,6,0
PRODUCTVERSION 2,0,6,0
FILEVERSION 2,0,2,0
PRODUCTVERSION 2,0,2,0
FILEFLAGSMASK 0x17L
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -71,12 +71,12 @@ BEGIN
BEGIN
VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 0, 6, 0"
VALUE "FileVersion", "2, 0, 2, 0"
VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 0, 6, 0"
VALUE "ProductVersion", "2, 0, 2, 0"
END
END
BLOCK "VarFileInfo"

View File

@ -711,7 +711,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -849,6 +849,10 @@
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\FastCompressDXT.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\InputOptions.cpp"
>
@ -861,10 +865,6 @@
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OutputOptions.cpp"
>
@ -911,6 +911,10 @@
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\FastCompressDXT.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\InputOptions.h"
>
@ -923,10 +927,6 @@
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\OutputOptions.h"
>

View File

@ -105,8 +105,7 @@ ENDIF(OPENEXR_FOUND)
FIND_PACKAGE(Qt4)
# Threads
FIND_PACKAGE(Threads REQUIRED)
MESSAGE(STATUS "Use thread library: ${CMAKE_THREAD_LIBS_INIT}")
FIND_PACKAGE(Threads)
# configuration file
INCLUDE(CheckIncludeFiles)

View File

@ -19,19 +19,13 @@ SET(CORE_SRCS
TextWriter.h
TextWriter.cpp
Radix.h
Radix.cpp
Library.h
Library.cpp)
Radix.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
# targets
ADD_DEFINITIONS(-DNVCORE_EXPORTS)
IF(UNIX)
SET(LIBS ${LIBS} ${CMAKE_DL_LIBS})
ENDIF(UNIX)
IF(NVCORE_SHARED)
ADD_LIBRARY(nvcore SHARED ${CORE_SRCS})
ELSE(NVCORE_SHARED)

View File

@ -824,13 +824,13 @@ namespace nv
}
/// Number of entries in the hash.
int size() const
int size()
{
return entry_count;
}
/// Number of entries in the hash.
int count() const
int count()
{
return size();
}

View File

@ -28,7 +28,7 @@
#endif
#if NV_OS_LINUX && defined(HAVE_EXECINFO_H)
# include <execinfo.h> // backtrace
# include <execinfo.h>
# if NV_CC_GNUC // defined(HAVE_CXXABI_H)
# include <cxxabi.h>
# endif
@ -38,14 +38,7 @@
# include <unistd.h> // getpid
# include <sys/types.h>
# include <sys/sysctl.h> // sysctl
# include <sys/ucontext.h>
# undef HAVE_EXECINFO_H
# if defined(HAVE_EXECINFO_H) // only after OSX 10.5
# include <execinfo.h> // backtrace
# if NV_CC_GNUC // defined(HAVE_CXXABI_H)
# include <cxxabi.h>
# endif
# endif
# include <ucontext.h>
#endif
#include <stdexcept> // std::runtime_error
@ -135,14 +128,6 @@ namespace
#if defined(HAVE_EXECINFO_H) // NV_OS_LINUX
static bool nvHasStackTrace() {
#if NV_OS_DARWIN
return backtrace != NULL;
#else
return true;
#endif
}
static void nvPrintStackTrace(void * trace[], int size, int start=0) {
char ** string_array = backtrace_symbols(trace, size);
@ -181,26 +166,13 @@ namespace
static void * callerAddress(void * secret)
{
# if NV_OS_DARWIN
# if defined(_STRUCT_MCONTEXT)
# if NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->__ss.__srr0;
# elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->__ss.__eip;
# endif
# else
# if NV_CPU_PPC
# if NV_OS_DARWIN && NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.srr0;
# elif NV_CPU_X86
# elif NV_OS_DARWIN && NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.eip;
# endif
# endif
# else
# if NV_CPU_X86_64
# elif NV_CPU_X86_64
// #define REG_RIP REG_INDEX(rip) // seems to be 16
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.gregs[REG_RIP];
@ -210,7 +182,8 @@ namespace
# elif NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext.regs->nip;
# endif
# else
return NULL;
# endif
// How to obtain the instruction pointers in different platforms, from mlton's source code.
@ -255,8 +228,7 @@ namespace
}
# if defined(HAVE_EXECINFO_H)
if (nvHasStackTrace()) // in case of weak linking
{
void * trace[64];
int size = backtrace(trace, 64);
@ -266,7 +238,7 @@ namespace
}
nvPrintStackTrace(trace, size, 1);
}
# endif // defined(HAVE_EXECINFO_H)
exit(0);
@ -401,12 +373,9 @@ namespace
# endif
# if defined(HAVE_EXECINFO_H)
if (nvHasStackTrace())
{
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 2);
}
nvPrintStackTrace(trace, size, 3);
# endif
// Exit cleanly.
@ -453,12 +422,9 @@ void NV_CDECL nvDebug(const char *msg, ...)
void debug::dumpInfo()
{
#if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H)
if (nvHasStackTrace())
{
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 1);
}
#endif
}

View File

@ -115,7 +115,6 @@ namespace nv
{
NVCORE_API void dumpInfo();
// These functions are not thread safe.
NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
NVCORE_API void resetMessageHandler();

View File

@ -2,7 +2,8 @@
#error "Do not include this file directly."
#endif
#include <stdint.h> // uint8_t, int8_t, ...
#include <stdlib.h> // uint8_t, int8_t, ...
// Function linkage
#define DLL_IMPORT

View File

@ -19,9 +19,7 @@
// Set standard function names.
#define snprintf _snprintf
#if _MSC_VER < 1500
#define vsnprintf _vsnprintf
#endif
#define vsscanf _vsscanf
#define chdir _chdir
#define getcwd _getcwd
@ -72,6 +70,8 @@ typedef uint32 uint;
#pragma warning(disable : 4711) // function selected for automatic inlining
#pragma warning(disable : 4725) // Pentium fdiv bug
#pragma warning(disable : 4345) // behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized
#pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged.
#pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup

View File

@ -1,41 +0,0 @@
#include "Library.h"
#include "Debug.h"
#if NV_OS_WIN32
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#include <windows.h>
#else
#include <dlfcn.h>
#endif
void * nvLoadLibrary(const char * name)
{
#if NV_OS_WIN32
return (void *)LoadLibraryExA( name, NULL, 0 );
#else
return dlopen(name, RTLD_LAZY);
#endif
}
void nvUnloadLibrary(void * handle)
{
nvDebugCheck(handle != NULL);
#if NV_OS_WIN32
FreeLibrary((HMODULE)handle);
#else
dlclose(handle);
#endif
}
void * nvBindSymbol(void * handle, const char * symbol)
{
#if NV_OS_WIN32
return (void *)GetProcAddress((HMODULE)handle, symbol);
#else
return (void *)dlsym(handle, symbol);
#endif
}

View File

@ -1,50 +0,0 @@
// This code is in the public domain -- castano@gmail.com
#ifndef NV_CORE_LIBRARY_H
#define NV_CORE_LIBRARY_H
#include <nvcore/nvcore.h>
#if NV_OS_WIN32
#define LIBRARY_NAME(name) #name ".dll"
#elif NV_OS_DARWIN
#define NV_LIBRARY_NAME(name) "lib" #name ".dylib"
#else
#define NV_LIBRARY_NAME(name) "lib" #name ".so"
#endif
NVCORE_API void * nvLoadLibrary(const char * name);
NVCORE_API void nvUnloadLibrary(void * lib);
NVCORE_API void * nvBindSymbol(void * lib, const char * symbol);
class NVCORE_CLASS Library
{
public:
Library(const char * name)
{
handle = nvLoadLibrary(name);
}
~Library()
{
if (isValid())
{
nvUnloadLibrary(handle);
}
}
bool isValid() const
{
return handle != NULL;
}
void * bindSymbol(const char * symbol)
{
return nvBindSymbol(handle, symbol);
}
private:
void * handle;
};
#endif // NV_CORE_LIBRARY_H

View File

@ -24,7 +24,7 @@ __forceinline void nvPrefetch(const void * mem)
#else // NV_CC_MSVC
// do nothing in other case.
#define nvPrefetch(ptr)
#define piPrefetch(ptr)
#endif // NV_CC_MSVC

View File

@ -43,12 +43,9 @@ public:
/** Delete owned pointer and assign new one. */
void operator=( T * p ) {
if (p != m_ptr)
{
delete m_ptr;
m_ptr = p;
}
}
/** Member access. */
T * operator -> () const {
@ -252,14 +249,14 @@ public:
/** -> operator. */
BaseClass * operator -> () const
{
nvCheck( m_ptr != NULL );
piCheck( m_ptr != NULL );
return m_ptr;
}
/** * operator. */
BaseClass & operator*() const
{
nvCheck( m_ptr != NULL );
piCheck( m_ptr != NULL );
return *m_ptr;
}

View File

@ -47,25 +47,25 @@ public:
/** @name Stream implementation. */
//@{
virtual void seek( uint pos )
virtual void seek( int pos )
{
nvDebugCheck(m_fp != NULL);
nvDebugCheck(pos < size());
nvDebugCheck(pos >= 0 && pos < size());
fseek(m_fp, pos, SEEK_SET);
}
virtual uint tell() const
virtual int tell() const
{
nvDebugCheck(m_fp != NULL);
return ftell(m_fp);
}
virtual uint size() const
virtual int size() const
{
nvDebugCheck(m_fp != NULL);
uint pos = ftell(m_fp);
int pos = ftell(m_fp);
fseek(m_fp, 0, SEEK_END);
uint end = ftell(m_fp);
int end = ftell(m_fp);
fseek(m_fp, pos, SEEK_SET);
return end;
}
@ -117,11 +117,11 @@ public:
/** @name Stream implementation. */
//@{
/// Write data.
virtual uint serialize( void * data, uint len )
virtual void serialize( void * data, int len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(m_fp != NULL);
return (uint)fwrite(data, 1, len, m_fp);
fwrite(data, len, 1, m_fp);
}
virtual bool isLoading() const
@ -156,11 +156,11 @@ public:
/** @name Stream implementation. */
//@{
/// Read data.
virtual uint serialize( void * data, uint len )
virtual void serialize( void * data, int len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(m_fp != NULL);
return (uint)fread(data, 1, len, m_fp);
fread(data, len, 1, m_fp);
}
virtual bool isLoading() const
@ -184,40 +184,33 @@ class NVCORE_CLASS MemoryInputStream : public Stream
public:
/// Ctor.
MemoryInputStream( const uint8 * mem, uint size ) :
MemoryInputStream( const uint8 * mem, int size ) :
m_mem(mem), m_ptr(mem), m_size(size) { }
/** @name Stream implementation. */
//@{
/// Read data.
virtual uint serialize( void * data, uint len )
virtual void serialize( void * data, int len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(!isError());
uint left = m_size - tell();
if (len > left) len = left;
memcpy( data, m_ptr, len );
m_ptr += len;
return len;
}
virtual void seek( uint pos )
virtual void seek( int pos )
{
nvDebugCheck(!isError());
m_ptr = m_mem + pos;
nvDebugCheck(!isError());
}
virtual uint tell() const
virtual int tell() const
{
nvDebugCheck(m_ptr >= m_mem);
return uint(m_ptr - m_mem);
return int(m_ptr - m_mem);
}
virtual uint size() const
virtual int size() const
{
return m_size;
}
@ -259,7 +252,7 @@ private:
const uint8 * m_mem;
const uint8 * m_ptr;
uint m_size;
int m_size;
};
@ -293,19 +286,17 @@ public:
/** @name Stream implementation. */
//@{
/// Read data.
virtual uint serialize( void * data, uint len )
virtual void serialize( void * data, int len )
{
nvDebugCheck(data != NULL);
len = m_s->serialize( data, len );
m_s->serialize( data, len );
if( m_s->isError() ) {
throw std::exception();
}
return len;
}
virtual void seek( uint pos )
virtual void seek( int pos )
{
m_s->seek( pos );
@ -314,12 +305,12 @@ public:
}
}
virtual uint tell() const
virtual int tell() const
{
return m_s->tell();
}
virtual uint size() const
virtual int size() const
{
return m_s->size();
}

View File

@ -209,11 +209,48 @@ StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL)
}
/** Copy string. */
StringBuilder::StringBuilder( const char * s ) : m_size(0), m_str(NULL)
StringBuilder::StringBuilder( const char * s )
{
copy(s);
}
/** Allocate and copy string. */
StringBuilder::StringBuilder( int size_hint, const StringBuilder & s) : m_size(size_hint), m_str(NULL)
{
nvDebugCheck(m_size > 0);
m_str = strAlloc(m_size);
copy(s);
}
/** Allocate and format string. */
StringBuilder::StringBuilder( const char * fmt, ... ) : m_size(0), m_str(NULL)
{
nvDebugCheck(fmt != NULL);
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
/** Allocate and format string. */
StringBuilder::StringBuilder( int size_hint, const char * fmt, ... ) : m_size(size_hint), m_str(NULL)
{
nvDebugCheck(m_size > 0);
nvDebugCheck(fmt != NULL);
m_str = strAlloc(m_size);
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
/** Delete the string. */
StringBuilder::~StringBuilder()
{
@ -241,7 +278,8 @@ StringBuilder & StringBuilder::format( const char * fmt, ... )
/** Format a string safely. */
StringBuilder & StringBuilder::format( const char * fmt, va_list arg )
{
nvDebugCheck(fmt != NULL);
nvCheck(fmt != NULL);
nvCheck(m_size >= 0);
if( m_size == 0 ) {
m_size = 64;
@ -289,7 +327,8 @@ StringBuilder & StringBuilder::format( const char * fmt, va_list arg )
/** Append a string. */
StringBuilder & StringBuilder::append( const char * s )
{
nvDebugCheck(s != NULL);
nvCheck(s != NULL);
nvCheck(m_size >= 0);
const uint slen = uint(strlen( s ));
@ -436,6 +475,31 @@ void StringBuilder::reset()
}
Path::Path(const char * fmt, ...)
{
nvDebugCheck( fmt != NULL );
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
Path::Path(int size_hint, const char * fmt, ...) : StringBuilder(size_hint)
{
nvDebugCheck( fmt != NULL );
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
/// Get the file name from a path.
const char * Path::fileName() const
{
@ -545,6 +609,8 @@ const char * Path::extension(const char * str)
}
// static
String String::s_null(String::null);
/// Clone this string
String String::clone() const
@ -556,12 +622,12 @@ String String::clone() const
void String::setString(const char * str)
{
if( str == NULL ) {
data = NULL;
data = s_null.data;
}
else {
allocString( str );
addRef();
}
addRef();
}
void String::setString(const char * str, int length)
@ -575,10 +641,10 @@ void String::setString(const char * str, int length)
void String::setString(const StringBuilder & str)
{
if( str.str() == NULL ) {
data = NULL;
data = s_null.data;
}
else {
allocString(str);
}
addRef();
}
}

View File

@ -14,7 +14,7 @@ namespace nv
uint strHash(const char * str, uint h) NV_PURE;
/// String hash based on Bernstein's hash.
/// String hash vased on Bernstein's hash.
inline uint strHash(const char * data, uint h = 5381)
{
uint i;
@ -47,6 +47,9 @@ namespace nv
explicit StringBuilder( int size_hint );
StringBuilder( const char * str );
StringBuilder( const StringBuilder & );
StringBuilder( int size_hint, const StringBuilder & );
StringBuilder( const char * format, ... ) __attribute__((format (printf, 2, 3)));
StringBuilder( int size_hint, const char * format, ... ) __attribute__((format (printf, 3, 4)));
~StringBuilder();
@ -119,14 +122,16 @@ namespace nv
};
/// Path string. @@ This should be called PathBuilder.
/// Path string.
class NVCORE_CLASS Path : public StringBuilder
{
public:
Path() : StringBuilder() {}
explicit Path(int size_hint) : StringBuilder(size_hint) {}
Path(const char * str) : StringBuilder(str) {}
Path(const Path & path) : StringBuilder(path) {}
Path(const StringBuilder & str) : StringBuilder(str) {}
Path(int size_hint, const StringBuilder & str) : StringBuilder(size_hint, str) {}
Path(const char * format, ...) __attribute__((format (printf, 2, 3)));
Path(int size_hint, const char * format, ...) __attribute__((format (printf, 3, 4)));
const char * fileName() const;
const char * extension() const;
@ -151,14 +156,15 @@ namespace nv
/// Constructs a null string. @sa isNull()
String()
{
data = NULL;
data = s_null.data;
addRef();
}
/// Constructs a shared copy of str.
String(const String & str)
{
data = str.data;
if (data != NULL) addRef();
addRef();
}
/// Constructs a shared string from a standard string.
@ -182,6 +188,7 @@ namespace nv
/// Dtor.
~String()
{
nvDebugCheck(data != NULL);
release();
}
@ -205,62 +212,53 @@ namespace nv
/// Implement value semantics.
String & operator=( const String & str )
{
if (str.data != data)
{
release();
data = str.data;
addRef();
}
return *this;
}
/// Equal operator.
bool operator==( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return true;
}
if ((data == NULL) != (str.data == NULL)) {
return false;
}
return strcmp(data, str.data) == 0;
}
/// Equal operator.
bool operator==( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) == 0;
}
/// Not equal operator.
bool operator!=( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return false;
}
if ((data == NULL) != (str.data == NULL)) {
return true;
}
return strcmp(data, str.data) != 0;
}
/// Not equal operator.
bool operator!=( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) != 0;
}
/// Returns true if this string is the null string.
bool isNull() const { return data == NULL; }
bool isNull() const { nvDebugCheck(data != NULL); return data == s_null.data; }
/// Return the exact length.
uint length() const { nvDebugCheck(data != NULL); return uint(strlen(data)); }
@ -269,28 +267,31 @@ namespace nv
uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
/// const char * cast operator.
operator const char * () const { return data; }
operator const char * () const { nvDebugCheck(data != NULL); return data; }
/// Get string pointer.
const char * str() const { return data; }
const char * str() const { nvDebugCheck(data != NULL); return data; }
private:
// Add reference count.
void addRef()
{
if (data != NULL)
{
setRefCount(getRefCount() + 1);
enum null_t { null };
// Private constructor for null string.
String(null_t) {
setString("");
}
// Add reference count.
void addRef() {
nvDebugCheck(data != NULL);
setRefCount(getRefCount() + 1);
}
// Decrease reference count.
void release()
{
if (data != NULL)
{
void release() {
nvDebugCheck(data != NULL);
const uint16 count = getRefCount();
setRefCount(count - 1);
if( count - 1 == 0 ) {
@ -298,16 +299,12 @@ namespace nv
data = NULL;
}
}
}
uint16 getRefCount() const
{
nvDebugCheck(data != NULL);
uint16 getRefCount() const {
return *reinterpret_cast<const uint16 *>(data - 2);
}
void setRefCount(uint16 count) {
nvDebugCheck(data != NULL);
nvCheck(count < 0xFFFF);
*reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
}
@ -346,6 +343,8 @@ namespace nv
private:
NVCORE_API static String s_null;
const char * data;
};

View File

@ -41,17 +41,17 @@ public:
ByteOrder byteOrder() const { return m_byteOrder; }
/// Serialize the given data.
virtual uint serialize( void * data, uint len ) = 0;
/// Serialize the given data. @@ Should return bytes serialized?
virtual void serialize( void * data, int len ) = 0;
/// Move to the given position in the archive.
virtual void seek( uint pos ) = 0;
virtual void seek( int pos ) = 0;
/// Return the current position in the archive.
virtual uint tell() const = 0;
virtual int tell() const = 0;
/// Return the current size of the archive.
virtual uint size() const = 0;
virtual int size() const = 0;
/// Determine if there has been any error.
virtual bool isError() const = 0;
@ -136,13 +136,13 @@ public:
protected:
/// Serialize in the stream byte order.
Stream & byteOrderSerialize( void * v, uint len ) {
Stream & byteOrderSerialize( void * v, int len ) {
if( m_byteOrder == getSystemByteOrder() ) {
serialize( v, len );
}
else {
for( uint i = len; i > 0; i-- ) {
serialize( (uint8 *)v + i - 1, 1 );
for( int i=len-1; i>=0; i-- ) {
serialize( (uint8 *)v + i, 1 );
}
}
return *this;

View File

@ -532,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (2 << 16) | (0 << 8) | (7); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (2); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;
@ -989,10 +989,10 @@ void DirectDrawSurface::readLinearImage(Image * img)
stream->serialize(&c, byteCount);
Color32 pixel(0, 0, 0, 0xFF);
pixel.r = PixelFormat::convert((c & header.pf.rmask) >> rshift, rsize, 8);
pixel.g = PixelFormat::convert((c & header.pf.gmask) >> gshift, gsize, 8);
pixel.b = PixelFormat::convert((c & header.pf.bmask) >> bshift, bsize, 8);
pixel.a = PixelFormat::convert((c & header.pf.amask) >> ashift, asize, 8);
pixel.r = PixelFormat::convert(c >> rshift, rsize, 8);
pixel.g = PixelFormat::convert(c >> gshift, gsize, 8);
pixel.b = PixelFormat::convert(c >> bshift, bsize, 8);
pixel.a = PixelFormat::convert(c >> ashift, asize, 8);
img->pixel(x, y) = pixel;
}

View File

@ -33,10 +33,11 @@
* http://www.dspguide.com/ch16.htm
*/
#include "Filter.h"
#include <nvmath/Vector.h> // Vector4
#include <nvcore/Containers.h> // swap
#include <nvmath/nvmath.h> // fabs
#include <nvmath/Vector.h> // Vector4
#include <nvimage/Filter.h>
using namespace nv;
@ -243,7 +244,7 @@ SincFilter::SincFilter(float w) : Filter(w) {}
float SincFilter::evaluate(float x) const
{
return sincf(PI * x);
return 0.0f;
}
@ -503,7 +504,7 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
for (int i = 0; i < 7; i++) {
for (int e = 0; e < 7; e++) {
m_data[(i + 1) * 9 + e + 1] += elements[i * 7 + e] * scale.z();
m_data[i * 9 + e + 1] += elements[i * 7 + e] * scale.z();
}
}
}
@ -518,7 +519,7 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
for (int i = 0; i < 5; i++) {
for (int e = 0; e < 5; e++) {
m_data[(i + 2) * 9 + e + 2] += elements[i * 5 + e] * scale.y();
m_data[i * 9 + e + 2] += elements[i * 5 + e] * scale.y();
}
}
}
@ -531,7 +532,7 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
for (int i = 0; i < 3; i++) {
for (int e = 0; e < 3; e++) {
m_data[(i + 3) * 9 + e + 3] += elements[i * 3 + e] * scale.x();
m_data[i * 9 + e + 3] += elements[i * 3 + e] * scale.x();
}
}
}
@ -540,17 +541,12 @@ void Kernel2::initBlendedSobel(const Vector4 & scale)
PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples/*= 32*/)
{
nvCheck(srcLength >= dstLength); // @@ Upsampling not implemented!
nvDebugCheck(samples > 0);
float scale = float(dstLength) / float(srcLength);
const float scale = float(dstLength) / float(srcLength);
const float iscale = 1.0f / scale;
if (scale > 1) {
// Upsampling.
samples = 1;
scale = 1;
}
m_length = dstLength;
m_width = f.width() * iscale;
m_windowSize = (int)ceilf(m_width * 2) + 1;

View File

@ -11,16 +11,16 @@ namespace nv
class Vector4;
/// Base filter class.
class NVIMAGE_CLASS Filter
class Filter
{
public:
Filter(float width);
virtual ~Filter();
NVIMAGE_API Filter(float width);
NVIMAGE_API virtual ~Filter();
float width() const { return m_width; }
float sampleDelta(float x, float scale) const;
float sampleBox(float x, float scale, int samples) const;
float sampleTriangle(float x, float scale, int samples) const;
NVIMAGE_API float width() const { return m_width; }
NVIMAGE_API float sampleDelta(float x, float scale) const;
NVIMAGE_API float sampleBox(float x, float scale, int samples) const;
NVIMAGE_API float sampleTriangle(float x, float scale, int samples) const;
virtual float evaluate(float x) const = 0;
@ -29,56 +29,56 @@ namespace nv
};
// Box filter.
class NVIMAGE_CLASS BoxFilter : public Filter
class BoxFilter : public Filter
{
public:
BoxFilter();
BoxFilter(float width);
virtual float evaluate(float x) const;
NVIMAGE_API BoxFilter();
NVIMAGE_API BoxFilter(float width);
NVIMAGE_API virtual float evaluate(float x) const;
};
// Triangle (bilinear/tent) filter.
class NVIMAGE_CLASS TriangleFilter : public Filter
class TriangleFilter : public Filter
{
public:
TriangleFilter();
TriangleFilter(float width);
virtual float evaluate(float x) const;
NVIMAGE_API TriangleFilter();
NVIMAGE_API TriangleFilter(float width);
NVIMAGE_API virtual float evaluate(float x) const;
};
// Quadratic (bell) filter.
class NVIMAGE_CLASS QuadraticFilter : public Filter
class QuadraticFilter : public Filter
{
public:
QuadraticFilter();
virtual float evaluate(float x) const;
NVIMAGE_API QuadraticFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
// Cubic filter from Thatcher Ulrich.
class NVIMAGE_CLASS CubicFilter : public Filter
class CubicFilter : public Filter
{
public:
CubicFilter();
virtual float evaluate(float x) const;
NVIMAGE_API CubicFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
// Cubic b-spline filter from Paul Heckbert.
class NVIMAGE_CLASS BSplineFilter : public Filter
class BSplineFilter : public Filter
{
public:
BSplineFilter();
virtual float evaluate(float x) const;
NVIMAGE_API BSplineFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
/// Mitchell & Netravali's two-param cubic
/// @see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88
class NVIMAGE_CLASS MitchellFilter : public Filter
class MitchellFilter : public Filter
{
public:
MitchellFilter();
virtual float evaluate(float x) const;
NVIMAGE_API MitchellFilter();
NVIMAGE_API virtual float evaluate(float x) const;
void setParameters(float b, float c);
NVIMAGE_API void setParameters(float a, float b);
private:
float p0, p2, p3;
@ -86,29 +86,29 @@ namespace nv
};
// Lanczos3 filter.
class NVIMAGE_CLASS LanczosFilter : public Filter
class LanczosFilter : public Filter
{
public:
LanczosFilter();
virtual float evaluate(float x) const;
NVIMAGE_API LanczosFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
// Sinc filter.
class NVIMAGE_CLASS SincFilter : public Filter
class SincFilter : public Filter
{
public:
SincFilter(float w);
virtual float evaluate(float x) const;
NVIMAGE_API SincFilter(float w);
NVIMAGE_API virtual float evaluate(float x) const;
};
// Kaiser filter.
class NVIMAGE_CLASS KaiserFilter : public Filter
class KaiserFilter : public Filter
{
public:
KaiserFilter(float w);
virtual float evaluate(float x) const;
NVIMAGE_API KaiserFilter(float w);
NVIMAGE_API virtual float evaluate(float x) const;
void setParameters(float a, float stretch);
NVIMAGE_API void setParameters(float a, float stretch);
private:
float alpha;
@ -118,12 +118,12 @@ namespace nv
/// A 1D kernel. Used to precompute filter weights.
class NVIMAGE_CLASS Kernel1
class Kernel1
{
NV_FORBID_COPY(Kernel1);
public:
Kernel1(const Filter & f, int iscale, int samples = 32);
~Kernel1();
NVIMAGE_API Kernel1(const Filter & f, int iscale, int samples = 32);
NVIMAGE_API ~Kernel1();
float valueAt(uint x) const {
nvDebugCheck(x < (uint)m_windowSize);
@ -138,7 +138,7 @@ namespace nv
return m_width;
}
void debugPrint();
NVIMAGE_API void debugPrint();
private:
int m_windowSize;
@ -148,15 +148,15 @@ namespace nv
/// A 2D kernel.
class NVIMAGE_CLASS Kernel2
class Kernel2
{
public:
Kernel2(uint width);
Kernel2(const Kernel2 & k);
~Kernel2();
NVIMAGE_API Kernel2(uint width);
NVIMAGE_API Kernel2(const Kernel2 & k);
NVIMAGE_API ~Kernel2();
void normalize();
void transpose();
NVIMAGE_API void normalize();
NVIMAGE_API void transpose();
float valueAt(uint x, uint y) const {
return m_data[y * m_windowSize + x];
@ -166,12 +166,12 @@ namespace nv
return m_windowSize;
}
void initLaplacian();
void initEdgeDetection();
void initSobel();
void initPrewitt();
NVIMAGE_API void initLaplacian();
NVIMAGE_API void initEdgeDetection();
NVIMAGE_API void initSobel();
NVIMAGE_API void initPrewitt();
void initBlendedSobel(const Vector4 & scale);
NVIMAGE_API void initBlendedSobel(const Vector4 & scale);
private:
const uint m_windowSize;
@ -180,12 +180,12 @@ namespace nv
/// A 1D polyphase kernel
class NVIMAGE_CLASS PolyphaseKernel
class PolyphaseKernel
{
NV_FORBID_COPY(PolyphaseKernel);
public:
PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32);
~PolyphaseKernel();
NVIMAGE_API PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32);
NVIMAGE_API ~PolyphaseKernel();
int windowSize() const {
return m_windowSize;
@ -205,7 +205,7 @@ namespace nv
return m_data[column * m_windowSize + x];
}
void debugPrint() const;
NVIMAGE_API void debugPrint() const;
private:
int m_windowSize;

View File

@ -1,18 +1,16 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Containers.h>
#include <nvcore/Ptr.h>
#include <nvmath/Color.h>
#include "FloatImage.h"
#include "Filter.h"
#include "Image.h"
#include <nvmath/Color.h>
#include <nvmath/Matrix.h>
#include <nvcore/Containers.h>
#include <nvcore/Ptr.h>
#include <math.h>
using namespace nv;
namespace
@ -142,8 +140,7 @@ Image * FloatImage::createImageGammaCorrect(float gamma/*= 2.2f*/) const
/// Allocate a 2d float image of the given format and the given extents.
void FloatImage::allocate(uint c, uint w, uint h)
{
free();
nvCheck(m_mem == NULL);
m_width = w;
m_height = h;
m_componentNum = c;
@ -154,6 +151,7 @@ void FloatImage::allocate(uint c, uint w, uint h)
/// Free the image, but don't clear the members.
void FloatImage::free()
{
nvCheck(m_mem != NULL);
nv::mem::free( reinterpret_cast<void *>(m_mem) );
m_mem = NULL;
}
@ -378,7 +376,7 @@ FloatImage * FloatImage::fastDownSample() const
{
const uint n = w * h;
if ((m_width * m_height) & 1)
if (n & 1)
{
const float scale = 1.0f / (2 * n + 1);
@ -542,27 +540,73 @@ FloatImage * FloatImage::fastDownSample() const
return dst_image.release();
}
/*
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Kernel1 & kernel, WrapMode wm) const
{
const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2);
return downSample(kernel, w, h, wm);
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Kernel1 & kernel, uint w, uint h, WrapMode wm) const
{
nvCheck(!(kernel.windowSize() & 1)); // Make sure that kernel m_width is even.
AutoPtr<FloatImage> tmp_image( new FloatImage() );
tmp_image->allocate(m_componentNum, w, m_height);
AutoPtr<FloatImage> dst_image( new FloatImage() );
dst_image->allocate(m_componentNum, w, h);
const float xscale = float(m_width) / float(w);
const float yscale = float(m_height) / float(h);
for(uint c = 0; c < m_componentNum; c++) {
float * tmp_channel = tmp_image->channel(c);
for(uint y = 0; y < m_height; y++) {
for(uint x = 0; x < w; x++) {
float sum = this->applyKernelHorizontal(&kernel, uint(x*xscale), y, c, wm);
const uint tmp_index = tmp_image->index(x, y);
tmp_channel[tmp_index] = sum;
}
}
float * dst_channel = dst_image->channel(c);
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
float sum = tmp_image->applyKernelVertical(&kernel, uint(x*xscale), uint(y*yscale), c, wm);
const uint dst_index = dst_image->index(x, y);
dst_channel[dst_index] = sum;
}
}
}
return dst_image.release();
}
*/
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm) const
{
const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2);
return resize(filter, w, h, wm);
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm, uint alpha) const
{
const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2);
return resize(filter, w, h, wm, alpha);
return downSample(filter, w, h, wm);
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm) const
FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapMode wm) const
{
// @@ Use monophase filters when frac(m_width / w) == 0
@ -631,56 +675,10 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
return dst_image.release();
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const
{
nvCheck(alpha < m_componentNum);
AutoPtr<FloatImage> tmp_image( new FloatImage() );
AutoPtr<FloatImage> dst_image( new FloatImage() );
PolyphaseKernel xkernel(filter, m_width, w, 32);
PolyphaseKernel ykernel(filter, m_height, h, 32);
{
tmp_image->allocate(m_componentNum, w, m_height);
dst_image->allocate(m_componentNum, w, h);
Array<float> tmp_column(h);
tmp_column.resize(h);
for (uint c = 0; c < m_componentNum; c++)
{
float * tmp_channel = tmp_image->channel(c);
for (uint y = 0; y < m_height; y++) {
this->applyKernelHorizontal(xkernel, y, c, alpha, wm, tmp_channel + y * w);
}
}
// Process all channels before applying vertical kernel to make sure alpha has been computed.
for (uint c = 0; c < m_componentNum; c++)
{
float * dst_channel = dst_image->channel(c);
for (uint x = 0; x < w; x++) {
tmp_image->applyKernelVertical(ykernel, x, c, alpha, wm, tmp_column.unsecureBuffer());
for (uint y = 0; y < h; y++) {
dst_channel[y * w + x] = tmp_column[y];
}
}
}
}
return dst_image.release();
}
/// Apply 2D kernel at the given coordinates and return result.
float FloatImage::applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const
float FloatImage::applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode wm) const
{
nvDebugCheck(k != NULL);
@ -709,7 +707,7 @@ float FloatImage::applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode
/// Apply 1D vertical kernel at the given coordinates and return result.
float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const
float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, int c, WrapMode wm) const
{
nvDebugCheck(k != NULL);
@ -731,7 +729,7 @@ float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, uint c, W
}
/// Apply 1D horizontal kernel at the given coordinates and return result.
float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const
float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, int c, WrapMode wm) const
{
nvDebugCheck(k != NULL);
@ -754,7 +752,7 @@ float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c,
/// Apply 1D vertical kernel at the given coordinates and return result.
void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * __restrict output) const
void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, int c, WrapMode wm, float * output) const
{
const uint length = k.length();
const float scale = float(length) / float(m_height);
@ -786,7 +784,7 @@ void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, W
}
/// Apply 1D horizontal kernel at the given coordinates and return result.
void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * __restrict output) const
void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, int c, WrapMode wm, float * output) const
{
const uint length = k.length();
const float scale = float(length) / float(m_width);
@ -817,93 +815,3 @@ void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c,
}
}
/// Apply 1D vertical kernel at the given coordinates and return result.
void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * __restrict output) const
{
const uint length = k.length();
const float scale = float(length) / float(m_height);
const float iscale = 1.0f / scale;
const float width = k.width();
const int windowSize = k.windowSize();
const float * channel = this->channel(c);
const float * alpha = this->channel(a);
for (uint i = 0; i < length; i++)
{
const float center = (0.5f + i) * iscale;
const int left = (int)floorf(center - width);
const int right = (int)ceilf(center + width);
nvCheck(right - left <= windowSize);
float norm = 0;
float sum = 0;
for (int j = 0; j < windowSize; ++j)
{
const int idx = this->index(x, j+left, wm);
float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f));
norm += w;
sum += w * channel[idx];
}
output[i] = sum / norm;
}
}
/// Apply 1D horizontal kernel at the given coordinates and return result.
void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * __restrict output) const
{
const uint length = k.length();
const float scale = float(length) / float(m_width);
const float iscale = 1.0f / scale;
const float width = k.width();
const int windowSize = k.windowSize();
const float * channel = this->channel(c);
const float * alpha = this->channel(a);
for (uint i = 0; i < length; i++)
{
const float center = (0.5f + i) * iscale;
const int left = (int)floorf(center - width);
const int right = (int)ceilf(center + width);
nvDebugCheck(right - left <= windowSize);
float norm = 0.0f;
float sum = 0;
for (int j = 0; j < windowSize; ++j)
{
const int idx = this->index(left + j, y, wm);
float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f));
norm += w;
sum += w * channel[idx];
}
output[i] = sum / norm;
}
}
FloatImage* FloatImage::clone() const
{
FloatImage* copy = new FloatImage();
copy->m_width = m_width;
copy->m_height = m_height;
copy->m_componentNum = m_componentNum;
copy->m_count = m_count;
if(m_mem)
{
copy->allocate(m_componentNum, m_width, m_height);
memcpy(copy->m_mem, m_mem, m_count * sizeof(float));
}
return copy;
}

View File

@ -3,20 +3,12 @@
#ifndef NV_IMAGE_FLOATIMAGE_H
#define NV_IMAGE_FLOATIMAGE_H
#include <nvimage/nvimage.h>
#include <nvmath/Vector.h>
#include <nvcore/Debug.h>
#include <nvcore/Containers.h> // clamp
#include <stdlib.h> // abs
#include <nvimage/nvimage.h>
namespace nv
{
class Vector4;
class Matrix;
class Image;
class Filter;
class Kernel1;
@ -71,19 +63,17 @@ public:
NVIMAGE_API FloatImage * fastDownSample() const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, uint w, uint h, WrapMode wm) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const;
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const;
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const;
//@}
NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const;
NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode wm) const;
NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, int c, WrapMode wm) const;
NVIMAGE_API float applyKernelHorizontal(const Kernel1 * k, int x, int y, int c, WrapMode wm) const;
NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, int c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, int c, WrapMode wm, float * output) const;
uint width() const { return m_width; }
@ -119,9 +109,6 @@ public:
float sampleLinearMirror(float x, float y, int c) const;
//@}
FloatImage* clone() const;
public:
uint index(uint x, uint y) const;
@ -239,15 +226,11 @@ inline uint FloatImage::indexRepeat(int x, int y) const
inline uint FloatImage::indexMirror(int x, int y) const
{
if (m_width == 1) x = 0;
x = abs(x);
while (x >= m_width) {
x = abs(m_width + m_width - x - 2);
}
if (m_height == 1) y = 0;
y = abs(y);
while (y >= m_height) {
y = abs(m_height + m_height - y - 2);

View File

@ -78,7 +78,7 @@ void Image::unwrap()
void Image::free()
{
nv::mem::free(m_data);
::free(m_data);
m_data = NULL;
}

View File

@ -21,16 +21,15 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Ptr.h>
#include <nvmath/Color.h>
#include <nvimage/NormalMap.h>
#include <nvimage/Filter.h>
#include <nvimage/FloatImage.h>
#include <nvimage/Image.h>
#include <nvmath/Color.h>
#include <nvcore/Ptr.h>
using namespace nv;
// Create normal map using the given kernels.

View File

@ -39,7 +39,7 @@ namespace nv
bool isSupported() const
{
if (version != 1) {
nvDebug("*** bad version number %u\n", version);
printf("*** bad version number %u\n", version);
return false;
}
if (channel_count > 4) {

View File

@ -12,14 +12,10 @@ http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT
@@ This code needs to be reviewed, I'm not sure it's correct.
*/
#include <nvimage/Quantize.h>
#include <nvimage/Image.h>
#include <nvimage/PixelFormat.h>
#include <nvmath/Color.h>
#include <nvcore/Containers.h> // swap
#include <nvimage/Image.h>
#include <nvimage/Quantize.h>
using namespace nv;
@ -51,20 +47,94 @@ void nv::Quantize::BinaryAlpha( Image * image, int alpha_threshold /*= 127*/ )
// Simple quantization.
void nv::Quantize::RGB16( Image * image )
{
Truncate(image, 5, 6, 5, 8);
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel32 = image->pixel(x, y);
// Convert to 16 bit and back to 32 using regular bit expansion.
Color32 pixel16 = toColor32( toColor16(pixel32) );
// Store color.
image->pixel(x, y) = pixel16;
}
}
}
// Alpha quantization.
void nv::Quantize::Alpha4( Image * image )
{
Truncate(image, 8, 8, 8, 4);
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Convert to 4 bit using regular bit expansion.
pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
// Store color.
image->pixel(x, y) = pixel;
}
}
}
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg_RGB16( Image * image )
{
FloydSteinberg(image, 5, 6, 5, 8);
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
// @@ Use fixed point?
Vector3 * row0 = new Vector3[w+2];
Vector3 * row1 = new Vector3[w+2];
memset(row0, 0, sizeof(Vector3)*(w+2));
memset(row1, 0, sizeof(Vector3)*(w+2));
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel32 = image->pixel(x, y);
// Add error. // @@ We shouldn't clamp here!
pixel32.r = clamp(int(pixel32.r) + int(row0[1+x].x()), 0, 255);
pixel32.g = clamp(int(pixel32.g) + int(row0[1+x].y()), 0, 255);
pixel32.b = clamp(int(pixel32.b) + int(row0[1+x].z()), 0, 255);
// Convert to 16 bit. @@ Use regular clamp?
Color32 pixel16 = toColor32( toColor16(pixel32) );
// Store color.
image->pixel(x, y) = pixel16;
// Compute new error.
Vector3 diff(float(pixel32.r - pixel16.r), float(pixel32.g - pixel16.g), float(pixel32.b - pixel16.b));
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
row1[1+x-1] += 3.0f / 16.0f * diff;
row1[1+x+0] += 5.0f / 16.0f * diff;
row1[1+x+1] += 1.0f / 16.0f * diff;
}
swap(row0, row1);
memset(row1, 0, sizeof(Vector3)*(w+2));
}
delete [] row0;
delete [] row1;
}
@ -118,55 +188,17 @@ void nv::Quantize::FloydSteinberg_BinaryAlpha( Image * image, int alpha_threshol
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg_Alpha4( Image * image )
{
FloydSteinberg(image, 8, 8, 8, 4);
}
void nv::Quantize::Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Convert to our desired size, and reconstruct.
pixel.r = PixelFormat::convert(pixel.r, 8, rsize);
pixel.r = PixelFormat::convert(pixel.r, rsize, 8);
pixel.g = PixelFormat::convert(pixel.g, 8, gsize);
pixel.g = PixelFormat::convert(pixel.g, gsize, 8);
pixel.b = PixelFormat::convert(pixel.b, 8, bsize);
pixel.b = PixelFormat::convert(pixel.b, bsize, 8);
pixel.a = PixelFormat::convert(pixel.a, 8, asize);
pixel.a = PixelFormat::convert(pixel.a, asize, 8);
// Store color.
image->pixel(x, y) = pixel;
}
}
}
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
Vector4 * row0 = new Vector4[w+2];
Vector4 * row1 = new Vector4[w+2];
memset(row0, 0, sizeof(Vector4)*(w+2));
memset(row1, 0, sizeof(Vector4)*(w+2));
// @@ Use fixed point?
float * row0 = new float[(w+2)];
float * row1 = new float[(w+2)];
memset(row0, 0, sizeof(float)*(w+2));
memset(row1, 0, sizeof(float)*(w+2));
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
@ -174,34 +206,16 @@ void nv::Quantize::FloydSteinberg(Image * image, uint rsize, uint gsize, uint bs
Color32 pixel = image->pixel(x, y);
// Add error.
pixel.r = clamp(int(pixel.r) + int(row0[1+x].x()), 0, 255);
pixel.g = clamp(int(pixel.g) + int(row0[1+x].y()), 0, 255);
pixel.b = clamp(int(pixel.b) + int(row0[1+x].z()), 0, 255);
pixel.a = clamp(int(pixel.a) + int(row0[1+x].w()), 0, 255);
int alpha = int(pixel.a) + int(row0[1+x]);
int r = pixel.r;
int g = pixel.g;
int b = pixel.b;
int a = pixel.a;
// Convert to our desired size, and reconstruct.
r = PixelFormat::convert(r, 8, rsize);
r = PixelFormat::convert(r, rsize, 8);
g = PixelFormat::convert(g, 8, gsize);
g = PixelFormat::convert(g, gsize, 8);
b = PixelFormat::convert(b, 8, bsize);
b = PixelFormat::convert(b, bsize, 8);
a = PixelFormat::convert(a, 8, asize);
a = PixelFormat::convert(a, asize, 8);
// Convert to 4 bit using regular bit expansion.
pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
// Store color.
image->pixel(x, y) = Color32(r, g, b, a);
image->pixel(x, y) = pixel;
// Compute new error.
Vector4 diff(float(int(pixel.r) - r), float(int(pixel.g) - g), float(int(pixel.b) - b), float(int(pixel.a) - a));
float diff = float(alpha - pixel.a);
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
@ -211,9 +225,10 @@ void nv::Quantize::FloydSteinberg(Image * image, uint rsize, uint gsize, uint bs
}
swap(row0, row1);
memset(row1, 0, sizeof(Vector4)*(w+2));
memset(row1, 0, sizeof(float)*(w+2));
}
delete [] row0;
delete [] row1;
}

View File

@ -3,9 +3,6 @@
#ifndef NV_IMAGE_QUANTIZE_H
#define NV_IMAGE_QUANTIZE_H
#include <nvimage/nvimage.h>
namespace nv
{
class Image;
@ -20,9 +17,6 @@ namespace nv
void FloydSteinberg_BinaryAlpha(Image * img, int alpha_threshold = 127);
void FloydSteinberg_Alpha4(Image * img);
void Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize);
void FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize);
// @@ Add palette quantization algorithms!
}
}

View File

@ -108,7 +108,7 @@ public:
float area() const
{
const Vector3 d = extents();
return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
}
/// Get the volume of the box.
@ -118,14 +118,6 @@ public:
return 8.0f * (d.x() * d.y() * d.z());
}
/// Return true if the box contains the given point.
bool contains(Vector3::Arg p) const
{
return
m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
}
private:
Vector3 m_mins;
@ -133,6 +125,15 @@ private:
};
/*
/// Point inside box test.
inline bool pointInsideBox(const Box & b, Vector3::Arg p) const
{
return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z());
}
*/
} // nv namespace

View File

@ -332,7 +332,7 @@ inline Matrix transpose(Matrix::Arg m)
Matrix r;
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 4; j++)
for (int j = 0; j < 4; i++)
{
r(i, j) = m(j, i);
}

View File

@ -48,37 +48,19 @@
#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)
*/
inline double sqrt_assert(const double f)
{
nvDebugCheck(f >= 0.0f);
return sqrt(f);
}
inline float sqrtf_assert(const float f)
inline float sqrt_assert(const float f)
{
nvDebugCheck(f >= 0.0f);
return sqrtf(f);
}
inline double acos_assert(const double f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return acos(f);
}
inline float acosf_assert(const float f)
inline float acos_assert(const float f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return acosf(f);
}
inline double asin_assert(const double f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return asin(f);
}
inline float asinf_assert(const float f)
inline float asin_assert(const float f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return asinf(f);
@ -86,11 +68,11 @@ inline float asinf_assert(const float f)
// Replace default functions with asserting ones.
#define sqrt sqrt_assert
#define sqrtf sqrtf_assert
#define sqrtf sqrt_assert
#define acos acos_assert
#define acosf acosf_assert
#define acosf acos_assert
#define asin asin_assert
#define asinf asinf_assert
#define asinf asin_assert
#if NV_OS_WIN32
#include <float.h>
@ -154,11 +136,6 @@ inline float lerp(float f0, float f1, float t)
return f0 * s + f1 * t;
}
inline float square(float f)
{
return f * f;
}
} // nv
#endif // NV_MATH_H

View File

@ -13,10 +13,10 @@ SET(NVTT_SRCS
CompressDXT.cpp
CompressRGB.h
CompressRGB.cpp
FastCompressDXT.h
FastCompressDXT.cpp
QuickCompressDXT.h
QuickCompressDXT.cpp
OptimalCompressDXT.h
OptimalCompressDXT.cpp
SingleColorLookup.h
CompressionOptions.h
CompressionOptions.cpp
@ -44,8 +44,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVTT_EXPORTS)
IF(NVTT_SHARED)
ADD_DEFINITIONS(-DNVTT_SHARED=1)
ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
ADD_LIBRARY(nvtt SHARED ${DXT_SRCS})
ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED)

View File

@ -29,8 +29,8 @@
#include "nvtt.h"
#include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
@ -57,33 +57,26 @@ using namespace nv;
using namespace nvtt;
nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
{
}
nv::FastCompressor::~FastCompressor()
{
}
void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.init(image, x, y);
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1(rgba.color(0), &block);
}
else
{
QuickCompress::compressDXT1(rgba, &block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -93,19 +86,27 @@ void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptio
}
void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions)
void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.init(image, x, y);
// @@ We could do better here: check for single RGB, but varying alpha.
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1a(rgba.color(0), &block);
}
else
{
QuickCompress::compressDXT1a(rgba, &block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -115,18 +116,17 @@ void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOpti
}
void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions)
void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT3 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.init(image, x, y);
QuickCompress::compressDXT3(rgba, &block);
if (outputOptions.outputHandler != NULL) {
@ -137,19 +137,19 @@ void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outpu
}
void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions)
void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
QuickCompress::compressDXT5(rgba, &block, 0);
rgba.init(image, x, y);
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!!
nv::compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -159,21 +159,23 @@ void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outpu
}
void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions)
void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.init(image, x, y);
// copy X coordinate to alpha channel and Y coordinate to green channel.
rgba.swizzleDXT5n();
QuickCompress::compressDXT5(rgba, &block, 0);
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!!
nv::compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -183,45 +185,59 @@ void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outp
}
nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
void nv::fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{
// @@ TODO
// compress red channel (X)
}
nv::SlowCompressor::~SlowCompressor()
void nv::fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{
// @@ TODO
// compress red, green channels (X,Y)
}
void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
void nv::doPrecomputation()
{
m_image = image;
m_alphaMode = alphaMode;
static bool done = false; // @@ Stop using statics for reentrancy. Although the worst that could happen is that this stuff is precomputed multiple times.
if (!done)
{
done = true;
squish::FastClusterFit::DoPrecomputation();
}
}
void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void nv::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT1 block;
squish::WeightedClusterFit fit;
doPrecomputation();
//squish::WeightedClusterFit fit;
//squish::ClusterFit fit;
//squish::FastClusterFit fit;
squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.init(image, x, y);
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block);
QuickCompress::compressDXT1(rgba.color(0), &block);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
@ -234,10 +250,10 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
}
void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT1 block;
@ -248,20 +264,11 @@ void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compr
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.init(image, x, y);
bool anyAlpha = false;
bool allAlpha = true;
for (uint i = 0; i < 16; i++)
if (rgba.isSingleColor())
{
if (rgba.color(i).a < 128) anyAlpha = true;
else allAlpha = false;
}
if ((!anyAlpha && rgba.isSingleColor() || allAlpha))
{
OptimalCompress::compressDXT1a(rgba.color(0), &block);
QuickCompress::compressDXT1a(rgba.color(0), &block);
}
else
{
@ -278,37 +285,29 @@ void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compr
}
void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void nv::compressDXT3(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT3 block;
squish::WeightedClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.init(image, x, y);
// Compress explicit alpha.
OptimalCompress::compressDXT3A(rgba, &block.alpha);
QuickCompress::compressDXT3A(rgba, &block.alpha);
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -317,10 +316,10 @@ void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compre
}
}
void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void nv::compressDXT5(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
@ -331,12 +330,12 @@ void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compre
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.init(image, x, y);
// Compress alpha.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block.alpha);
compressBlock_BruteForce(rgba, &block.alpha);
}
else
{
@ -344,16 +343,9 @@ void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compre
}
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -363,25 +355,28 @@ void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compre
}
void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
doPrecomputation();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.init(image, x, y);
// copy X coordinate to green channel and Y coordinate to alpha channel.
rgba.swizzleDXT5n();
// Compress X.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block.alpha);
compressBlock_BruteForce(rgba, &block.alpha);
}
else
{
@ -389,7 +384,7 @@ void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compr
}
// Compress Y.
OptimalCompress::compressDXT1G(rgba, &block.color);
QuickCompress::compressDXT1G(rgba, &block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -399,10 +394,10 @@ void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compr
}
void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
AlphaBlockDXT5 block;
@ -410,11 +405,11 @@ void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compres
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.init(image, x, y);
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block);
compressBlock_BruteForce(rgba, &block);
}
else
{
@ -429,10 +424,10 @@ void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compres
}
void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
const uint w = image->width();
const uint h = image->height();
ColorBlock xcolor;
ColorBlock ycolor;
@ -442,16 +437,16 @@ void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compres
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
xcolor.init(m_image, x, y);
xcolor.init(image, x, y);
xcolor.splatX();
ycolor.init(m_image, x, y);
ycolor.init(image, x, y);
ycolor.splatY();
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(xcolor, &block.x);
OptimalCompress::compressDXT5A(ycolor, &block.y);
compressBlock_BruteForce(xcolor, &block.x);
compressBlock_BruteForce(ycolor, &block.y);
}
else
{

View File

@ -32,45 +32,25 @@ namespace nv
class Image;
class FloatImage;
class FastCompressor
{
public:
FastCompressor();
~FastCompressor();
void doPrecomputation();
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
// Fast compressors.
void fastCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
class SlowCompressor
{
public:
SlowCompressor();
~SlowCompressor();
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
// Normal compressors.
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
// External compressors.
#if defined(HAVE_S3QUANT)

View File

@ -123,7 +123,7 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
}
// Zero padding.
for (uint x = w * byteCount; x < pitch; x++)
for (uint x = w; x < pitch; x++)
{
*(dst + x) = 0;
}

View File

@ -34,7 +34,6 @@
#include <nvimage/Filter.h>
#include <nvimage/Quantize.h>
#include <nvimage/NormalMap.h>
#include <nvimage/PixelFormat.h>
#include "Compressor.h"
#include "InputOptions.h"
@ -42,6 +41,7 @@
#include "OutputOptions.h"
#include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "CompressRGB.h"
#include "cuda/CudaUtils.h"
#include "cuda/CudaCompressDXT.h"
@ -200,22 +200,29 @@ namespace nvtt
AutoPtr<FloatImage> m_floatImage;
};
} // nvtt namespace
}
Compressor::Compressor() : m(*new Compressor::Private())
{
// CUDA initialization.
m.cudaSupported = cuda::isHardwarePresent();
m.cudaEnabled = false;
m.cudaDevice = -1;
m.cudaEnabled = m.cudaSupported;
enableCudaAcceleration(m.cudaSupported);
if (m.cudaEnabled)
{
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
}
Compressor::~Compressor()
{
enableCudaAcceleration(false);
delete &m;
}
@ -225,33 +232,17 @@ void Compressor::enableCudaAcceleration(bool enable)
{
if (m.cudaSupported)
{
if (m.cudaEnabled && !enable)
m.cudaEnabled = enable;
}
if (m.cudaEnabled && m.cuda == NULL)
{
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
if (m.cudaDevice != -1)
{
// Exit device.
cuda::exitDevice();
}
}
else if (!m.cudaEnabled && enable)
{
// Init the CUDA device. This may return -1 if CUDA was already initialized by the app.
m.cudaEnabled = cuda::initDevice(&m.cudaDevice);
if (m.cudaEnabled)
{
// Create compressor if initialization succeeds.
m.cuda = new CudaCompressor();
// But cleanup if failed.
if (!m.cuda->isValid())
{
enableCudaAcceleration(false);
}
}
}
}
}
@ -431,7 +422,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
quantizeMipmap(mipmap, compressionOptions);
compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
compressMipmap(mipmap, compressionOptions, outputOptions);
// Compute extents of next mipmap:
w = max(1U, w / 2);
@ -580,7 +571,7 @@ void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Priva
// Resize image.
BoxFilter boxFilter;
mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
mipmap.setImage(mipmap.asFloatImage()->downSample(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
}
@ -627,6 +618,13 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
{
nvDebugCheck(mipmap.asFixedImage() != NULL);
if (compressionOptions.enableColorDithering)
{
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
{
Quantize::FloydSteinberg_RGB16(mipmap.asMutableFixedImage());
}
}
if (compressionOptions.binaryAlpha)
{
if (compressionOptions.enableAlphaDithering)
@ -638,68 +636,30 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
}
}
if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering)
else
{
uint rsize = 8;
uint gsize = 8;
uint bsize = 8;
uint asize = 8;
if (compressionOptions.enableColorDithering)
{
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
{
rsize = 5;
gsize = 6;
bsize = 5;
}
else if (compressionOptions.format == Format_RGB)
{
uint rshift, gshift, bshift;
PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize);
PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize);
PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize);
}
}
if (compressionOptions.enableAlphaDithering)
{
if (compressionOptions.format == Format_DXT3)
{
asize = 4;
Quantize::Alpha4(mipmap.asMutableFixedImage());
}
else if (compressionOptions.format == Format_RGB)
else if (compressionOptions.format == Format_DXT1a)
{
uint ashift;
PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize);
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
}
}
if (compressionOptions.binaryAlpha)
{
asize = 8; // Already quantized.
}
Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize);
}
}
// Compress the given mipmap.
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
const Image * image = mipmap.asFixedImage();
nvDebugCheck(image != NULL);
FastCompressor fast;
fast.setImage(image, inputOptions.alphaMode);
SlowCompressor slow;
slow.setImage(image, inputOptions.alphaMode);
const bool useCuda = cudaEnabled && image->width() * image->height() >= 512;
if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
{
compressRGB(image, outputOptions, compressionOptions);
@ -723,19 +683,18 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1(outputOptions);
fastCompressDXT1(image, outputOptions);
}
else
{
if (useCuda)
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
cuda->compressDXT1(image, outputOptions, compressionOptions);
}
else
{
slow.compressDXT1(compressionOptions, outputOptions);
compressDXT1(image, outputOptions, compressionOptions);
}
}
}
@ -743,18 +702,18 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1a(outputOptions);
fastCompressDXT1a(image, outputOptions);
}
else
{
if (useCuda)
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
/*cuda*/slow.compressDXT1a(compressionOptions, outputOptions);
/*cuda*/compressDXT1a(image, outputOptions, compressionOptions);
}
else
{
slow.compressDXT1a(compressionOptions, outputOptions);
compressDXT1a(image, outputOptions, compressionOptions);
}
}
}
@ -762,19 +721,18 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT3(outputOptions);
fastCompressDXT3(image, outputOptions);
}
else
{
if (useCuda)
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT3(compressionOptions, outputOptions);
cuda->compressDXT3(image, outputOptions, compressionOptions);
}
else
{
slow.compressDXT3(compressionOptions, outputOptions);
compressDXT3(image, outputOptions, compressionOptions);
}
}
}
@ -782,19 +740,18 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT5(outputOptions);
fastCompressDXT5(image, outputOptions);
}
else
{
if (useCuda)
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT5(compressionOptions, outputOptions);
cuda->compressDXT5(image, outputOptions, compressionOptions);
}
else
{
slow.compressDXT5(compressionOptions, outputOptions);
compressDXT5(image, outputOptions, compressionOptions);
}
}
}
@ -802,20 +759,20 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT5n(outputOptions);
fastCompressDXT5n(image, outputOptions);
}
else
{
slow.compressDXT5n(compressionOptions, outputOptions);
compressDXT5n(image, outputOptions, compressionOptions);
}
}
else if (compressionOptions.format == Format_BC4)
{
slow.compressBC4(compressionOptions, outputOptions);
compressBC4(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_BC5)
{
slow.compressBC5(compressionOptions, outputOptions);
compressBC5(image, outputOptions, compressionOptions);
}
return true;

View File

@ -60,15 +60,13 @@ namespace nvtt
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
bool compressMipmap(const Mipmap & mipmap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
public:
bool cudaSupported;
bool cudaEnabled;
int cudaDevice;
nv::AutoPtr<nv::CudaCompressor> cuda;

View File

@ -0,0 +1,456 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "FastCompressDXT.h"
#if defined(__SSE2__)
#include <emmintrin.h>
#endif
#if defined(__SSE__)
#include <xmmintrin.h>
#endif
#if defined(__MMX__)
#include <mmintrin.h>
#endif
#undef __VEC__
#if defined(__VEC__)
#include <altivec.h>
#undef bool
#endif
// Online Resources:
// - http://www.jasondorie.com/ImageLib.zip
// - http://homepage.hispeed.ch/rscheidegger/dri_experimental/s3tc_index.html
// - http://www.sjbrown.co.uk/?article=dxt
using namespace nv;
#if defined(__SSE2__) && 0
// @@ TODO
typedef __m128i VectorColor;
inline static __m128i loadColor(Color32 c)
{
return ...;
}
inline static __m128i absoluteDifference(__m128i a, __m128i b)
{
return ...;
}
inline uint colorDistance(__m128i a, __m128i b)
{
return 0;
}
#elif defined(__MMX__) && 0
typedef __m64 VectorColor;
inline static __m64 loadColor(Color32 c)
{
return _mm_unpacklo_pi8(_mm_cvtsi32_si64(c), _mm_setzero_si64());
}
inline static __m64 absoluteDifference(__m64 a, __m64 b)
{
// = |a-b| or |b-a|
return _mm_or_si64(_mm_subs_pu16(a, b), _mm_subs_pu16(b, a));
}
inline uint colorDistance(__m64 a, __m64 b)
{
union {
__m64 v;
uint16 part[4];
} s;
s.v = absoluteDifference(a, b);
// @@ This is very slow!
return s.part[0] + s.part[1] + s.part[2] + s.part[3];
}
#define vectorEnd _mm_empty
#elif defined(__VEC__)
typedef vector signed int VectorColor;
inline static vector signed int loadColor(Color32 c)
{
return (vector signed int) (c.r, c.g, c.b, c.a);
}
// Get the absolute distance between the given colors.
inline static uint colorDistance(vector signed int c0, vector signed int c1)
{
int result;
vector signed int v = vec_sums(vec_abs(vec_sub(c0, c1)), (vector signed int)0);
vec_ste(vec_splat(v, 3), 0, &result);
return result;
}
inline void vectorEnd()
{
}
#else
typedef Color32 VectorColor;
inline static Color32 loadColor(Color32 c)
{
return c;
}
inline static Color32 premultiplyAlpha(Color32 c)
{
Color32 pm;
pm.r = (c.r * c.a) >> 8;
pm.g = (c.g * c.a) >> 8;
pm.b = (c.b * c.a) >> 8;
pm.a = c.a;
return pm;
}
inline static uint sqr(uint s)
{
return s*s;
}
// Get the absolute distance between the given colors.
inline static uint colorDistance(Color32 c0, Color32 c1)
{
return sqr(c0.r - c1.r) + sqr(c0.g - c1.g) + sqr(c0.b - c1.b);
//return abs(c0.r - c1.r) + abs(c0.g - c1.g) + abs(c0.b - c1.b);
}
inline void vectorEnd()
{
}
#endif
inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const VectorColor vcolor0 = loadColor(palette[0]);
const VectorColor vcolor1 = loadColor(palette[1]);
const VectorColor vcolor2 = loadColor(palette[2]);
const VectorColor vcolor3 = loadColor(palette[3]);
uint indices = 0;
for(int i = 0; i < 16; i++) {
const VectorColor vcolor = loadColor(rgba.color(i));
uint d0 = colorDistance(vcolor0, vcolor);
uint d1 = colorDistance(vcolor1, vcolor);
uint d2 = colorDistance(vcolor2, vcolor);
uint d3 = colorDistance(vcolor3, vcolor);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
vectorEnd();
return indices;
}
// Compressor that uses bounding box.
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block)
{
Color32 c0, c1;
rgba.boundsRange(&c1, &c0);
block->col0 = toColor16(c0);
block->col1 = toColor16(c1);
nvDebugCheck(block->col0.u > block->col1.u);
// Use 4 color mode only.
//if (block->col0.u < block->col1.u) {
// swap(block->col0.u, block->col1.u);
//}
Color32 palette[4];
block->evaluatePalette4(palette);
block->indices = computeIndices(rgba, palette);
}
// Encode DXT3 block.
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block)
{
compressBlock_BoundsRange(rgba, &block->color);
compressBlock(rgba, &block->alpha);
}
// Encode DXT3 alpha block.
void nv::compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block)
{
block->alpha0 = rgba.color(0).a >> 4;
block->alpha1 = rgba.color(1).a >> 4;
block->alpha2 = rgba.color(2).a >> 4;
block->alpha3 = rgba.color(3).a >> 4;
block->alpha4 = rgba.color(4).a >> 4;
block->alpha5 = rgba.color(5).a >> 4;
block->alpha6 = rgba.color(6).a >> 4;
block->alpha7 = rgba.color(7).a >> 4;
block->alpha8 = rgba.color(8).a >> 4;
block->alpha9 = rgba.color(9).a >> 4;
block->alphaA = rgba.color(10).a >> 4;
block->alphaB = rgba.color(11).a >> 4;
block->alphaC = rgba.color(12).a >> 4;
block->alphaD = rgba.color(13).a >> 4;
block->alphaE = rgba.color(14).a >> 4;
block->alphaF = rgba.color(15).a >> 4;
}
static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
totalError += besterror;
block->setIndex(i, best);
}
return totalError;
}
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += besterror;
}
return totalError;
}
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block)
{
Color32 c0, c1;
rgba.boundsRangeAlpha(&c1, &c0);
block->color.col0 = toColor16(c0);
block->color.col1 = toColor16(c1);
nvDebugCheck(block->color.col0.u > block->color.col1.u);
Color32 palette[4];
block->color.evaluatePalette4(palette);
block->color.indices = computeIndices(rgba, palette);
nvDebugCheck(c0.a <= c1.a);
block->alpha.alpha0 = c0.a;
block->alpha.alpha1 = c1.a;
computeAlphaIndices(rgba, &block->alpha);
}
uint nv::compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alpha0 = 0;
uint8 alpha1 = 255;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
alpha0 = max(alpha0, alpha);
alpha1 = min(alpha1, alpha);
}
alpha0 = alpha0 - (alpha0 - alpha1) / 32;
alpha1 = alpha1 + (alpha0 - alpha1) / 32;
AlphaBlockDXT5 block0;
block0.alpha0 = alpha0;
block0.alpha1 = alpha1;
uint error0 = computeAlphaIndices(rgba, &block0);
AlphaBlockDXT5 block1;
block1.alpha0 = alpha1;
block1.alpha1 = alpha0;
uint error1 = computeAlphaIndices(rgba, &block1);
if (error0 < error1)
{
*block = block0;
return error0;
}
else
{
*block = block1;
return error1;
}
}
uint nv::compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 mina = 255;
uint8 maxa = 0;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
mina = min(mina, alpha);
maxa = max(maxa, alpha);
}
block->alpha0 = maxa;
block->alpha1 = mina;
/*int centroidDist = 256;
int centroid;
// Get the closest to the centroid.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int dist = abs(alpha - (maxa + mina) / 2);
if (dist < centroidDist)
{
centroidDist = dist;
centroid = alpha;
}
}*/
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, block);
int besta0 = maxa;
int besta1 = mina;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
//for (int a1 = mina; a1 < maxa; a1++)
{
//nvCheck(abs(a1-a0) > 8);
//if (abs(a0 - a1) < 8) continue;
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
if ((maxa-a0) + (a1-mina) > besterror)
continue;
block->alpha0 = a0;
block->alpha1 = a1;
int error = computeAlphaError(rgba, block);
if (error < besterror)
{
besterror = error;
besta0 = a0;
besta1 = a1;
}
}
}
block->alpha0 = besta0;
block->alpha1 = besta1;
}
return computeAlphaIndices(rgba, block);
}

View File

@ -0,0 +1,84 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_FASTCOMPRESSDXT_H
#define NV_TT_FASTCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
// Color compression:
// Compressor that uses the extremes of the luminance axis.
// void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the extremes of the luminance axis.
// void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box.
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box and takes alpha into account.
// void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
// Simple, but slow compressor that tests all color pairs.
// void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force 6d search along the best fit axis.
// void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Spatial greedy search.
// void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force compressor for DXT5n
// void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
// Minimize error of the endpoints.
// void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
// uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
// uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
// Alpha compression:
void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block);
uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block);
// uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
} // nv namespace
#endif // NV_TT_FASTCOMPRESSDXT_H

View File

@ -94,7 +94,7 @@ void InputOptions::reset()
m.textureType = TextureType_2D;
m.inputFormat = InputFormat_BGRA_8UB;
m.alphaMode = AlphaMode_None;
m.alphaMode = AlphaMode_Transparency;
m.inputGamma = 2.2f;
m.outputGamma = 2.2f;

View File

@ -1,368 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
using namespace nv;
using namespace OptimalCompress;
namespace
{
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
// Choose quantized color that produces less error. Used by DXT3 compressor.
inline static uint quantize4(uint8 a)
{
int q0 = (a >> 4) - 1;
int q1 = (a >> 4);
int q2 = (a >> 4) + 1;
q0 = (q0 << 4) | q0;
q1 = (q1 << 4) | q1;
q2 = (q2 << 4) | q2;
int d0 = abs(q0 - a);
int d1 = abs(q1 - a);
int d2 = abs(q2 - a);
if (d0 < d1 && d0 < d2) return q0 >> 4;
if (d1 < d2) return q1 >> 4;
return q2 >> 4;
}
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best;
for (uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += besterror;
}
return totalError;
}
static void computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
block->setIndex(i, best);
}
}
} // namespace
// Single color compressor, based on:
// https://mollyrocket.com/forums/viewtopic.php?t=392
void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
{
if (rgba.a < 128)
{
dxtBlock->col0.u = 0;
dxtBlock->col1.u = 0;
dxtBlock->indices = 0xFFFFFFFF;
}
else
{
compressDXT1(rgba, dxtBlock);
}
}
// Brute force green channel compressor
void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
uint8 ming = 63;
uint8 maxg = 0;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
}
block->col0.r = 31;
block->col1.r = 31;
block->col0.g = maxg;
block->col1.g = ming;
block->col0.b = 0;
block->col1.b = 0;
if (maxg - ming > 4)
{
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
for (int g1 = ming; g1 < g0-4; g1++)
{
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
}
void OptimalCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
dxtBlock->alpha0 = quantize4(rgba.color(0).a);
dxtBlock->alpha1 = quantize4(rgba.color(1).a);
dxtBlock->alpha2 = quantize4(rgba.color(2).a);
dxtBlock->alpha3 = quantize4(rgba.color(3).a);
dxtBlock->alpha4 = quantize4(rgba.color(4).a);
dxtBlock->alpha5 = quantize4(rgba.color(5).a);
dxtBlock->alpha6 = quantize4(rgba.color(6).a);
dxtBlock->alpha7 = quantize4(rgba.color(7).a);
dxtBlock->alpha8 = quantize4(rgba.color(8).a);
dxtBlock->alpha9 = quantize4(rgba.color(9).a);
dxtBlock->alphaA = quantize4(rgba.color(10).a);
dxtBlock->alphaB = quantize4(rgba.color(11).a);
dxtBlock->alphaC = quantize4(rgba.color(12).a);
dxtBlock->alphaD = quantize4(rgba.color(13).a);
dxtBlock->alphaE = quantize4(rgba.color(14).a);
dxtBlock->alphaF = quantize4(rgba.color(15).a);
}
void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
{
uint8 mina = 255;
uint8 maxa = 0;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
mina = min(mina, alpha);
maxa = max(maxa, alpha);
}
dxtBlock->alpha0 = maxa;
dxtBlock->alpha1 = mina;
/*int centroidDist = 256;
int centroid;
// Get the closest to the centroid.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int dist = abs(alpha - (maxa + mina) / 2);
if (dist < centroidDist)
{
centroidDist = dist;
centroid = alpha;
}
}*/
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, dxtBlock);
int besta0 = maxa;
int besta1 = mina;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
//for (int a1 = mina; a1 < maxa; a1++)
{
//nvCheck(abs(a1-a0) > 8);
//if (abs(a0 - a1) < 8) continue;
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
if ((maxa-a0) + (a1-mina) > besterror)
continue;
dxtBlock->alpha0 = a0;
dxtBlock->alpha1 = a1;
int error = computeAlphaError(rgba, dxtBlock);
if (error < besterror)
{
besterror = error;
besta0 = a0;
besta1 = a1;
}
}
}
dxtBlock->alpha0 = besta0;
dxtBlock->alpha1 = besta1;
}
computeAlphaIndices(rgba, dxtBlock);
}

View File

@ -1,49 +0,0 @@
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_OPTIMALCOMPRESSDXT_H
#define NV_TT_OPTIMALCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
namespace OptimalCompress
{
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
}
} // nv namespace
#endif // NV_TT_OPTIMALCOMPRESSDXT_H

View File

@ -27,7 +27,7 @@
#include <nvimage/BlockDXT.h>
#include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
using namespace nv;
@ -288,6 +288,70 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
dxtBlock->indices = computeIndices3(block, a, b);
}*/
namespace
{
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
} // namespace
namespace
{
@ -439,13 +503,28 @@ namespace
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
// Single color compressor, based on:
// https://mollyrocket.com/forums/viewtopic.php?t=392
void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
if (rgba.isSingleColor())
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
OptimalCompress::compressDXT1(rgba.color(0), dxtBlock);
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
else
}
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
// read block
Vector3 block[16];
@ -474,27 +553,28 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
optimizeEndPoints4(block, dxtBlock);
}
}
void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
void QuickCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
{
bool hasAlpha = false;
for (uint i = 0; i < 16; i++)
if (rgba.a == 0)
{
if (rgba.color(i).a < 128) {
hasAlpha = true;
break;
dxtBlock->col0.u = 0;
dxtBlock->col1.u = 0;
dxtBlock->indices = 0xFFFFFFFF;
}
}
if (!hasAlpha)
else
{
compressDXT1(rgba, dxtBlock);
}
}
void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
if (!rgba.hasAlpha())
{
compressDXT1(rgba, dxtBlock);
}
// @@ Handle single RGB, with varying alpha? We need tables for single color compressor in 3 color mode.
//else if (rgba.isSingleColorNoAlpha()) { ... }
else
{
// read block
@ -527,14 +607,95 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
}
void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
// Brute force green channel compressor
void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
compressDXT1(rgba, &dxtBlock->color);
OptimalCompress::compressDXT3A(rgba, &dxtBlock->alpha);
nvDebugCheck(block != NULL);
uint8 ming = 63;
uint8 maxg = 0;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
}
block->col0.r = 31;
block->col1.r = 31;
block->col0.g = maxg;
block->col1.g = ming;
block->col0.b = 0;
block->col1.b = 0;
if (maxg - ming > 4)
{
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
for (int g1 = ming; g1 < g0-4; g1++)
{
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
}
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount/*=8*/)
void QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
// @@ Round instead of truncate. When rounding take into account bit expansion.
dxtBlock->alpha0 = rgba.color(0).a >> 4;
dxtBlock->alpha1 = rgba.color(1).a >> 4;
dxtBlock->alpha2 = rgba.color(2).a >> 4;
dxtBlock->alpha3 = rgba.color(3).a >> 4;
dxtBlock->alpha4 = rgba.color(4).a >> 4;
dxtBlock->alpha5 = rgba.color(5).a >> 4;
dxtBlock->alpha6 = rgba.color(6).a >> 4;
dxtBlock->alpha7 = rgba.color(7).a >> 4;
dxtBlock->alpha8 = rgba.color(8).a >> 4;
dxtBlock->alpha9 = rgba.color(9).a >> 4;
dxtBlock->alphaA = rgba.color(10).a >> 4;
dxtBlock->alphaB = rgba.color(11).a >> 4;
dxtBlock->alphaC = rgba.color(12).a >> 4;
dxtBlock->alphaD = rgba.color(13).a >> 4;
dxtBlock->alphaE = rgba.color(14).a >> 4;
dxtBlock->alphaF = rgba.color(15).a >> 4;
}
void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
{
compressDXT1(rgba, &dxtBlock->color);
compressDXT3A(rgba, &dxtBlock->alpha);
}
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
{
uint8 alpha0 = 0;
uint8 alpha1 = 255;
@ -554,7 +715,7 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
AlphaBlockDXT5 bestblock = block;
for (int i = 0; i < iterationCount; i++)
while(true)
{
optimizeAlpha8(rgba, &block);
uint error = computeAlphaIndices(rgba, &block);
@ -578,8 +739,9 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
*dxtBlock = bestblock;
}
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/)
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock)
{
compressDXT1(rgba, &dxtBlock->color);
compressDXT5A(rgba, &dxtBlock->alpha, iterationCount);
compressDXT5A(rgba, &dxtBlock->alpha);
}

View File

@ -37,13 +37,17 @@ namespace nv
namespace QuickCompress
{
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8);
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock);
}
} // nv namespace

File diff suppressed because it is too large Load Diff

View File

@ -159,7 +159,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
}
}
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16])
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
@ -189,11 +189,6 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
colorSums(colors, sums);
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
*sameColor = (axis == make_float3(0, 0, 0));
// Single color compressor needs unweighted colors.
if (*sameColor) colors[idx] = rawColors[idx];
dps[idx] = dot(rawColors[idx], axis);
#if __DEVICE_EMULATION__
@ -597,40 +592,6 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
}
*/
__device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
errors[idx] = bestError;
}
__device__ void evalLevel4Permutations(const float3 * colors, const float * weights, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
@ -666,6 +627,7 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
}
////////////////////////////////////////////////////////////////////////////////
// Find index with minimum error
////////////////////////////////////////////////////////////////////////////////
@ -836,39 +798,6 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint
}
}
__global__ void compressLevel4DXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float3 colors[16];
__shared__ float3 sums[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalLevel4Permutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result)
{
@ -876,18 +805,11 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
__shared__ float3 sums[16];
__shared__ float weights[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, weights, xrefs, &sameColor);
loadColorBlock(image, colors, sums, weights, xrefs);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
@ -1111,11 +1033,6 @@ extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result
compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressLevel4DXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);

View File

@ -30,14 +30,13 @@
#include <nvtt/CompressionOptions.h>
#include <nvtt/OutputOptions.h>
#include <nvtt/QuickCompressDXT.h>
#include <nvtt/OptimalCompressDXT.h>
#include "CudaCompressDXT.h"
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda_runtime_api.h>
#include <cuda_runtime.h>
#endif
#include <time.h>
@ -53,7 +52,6 @@ using namespace nvtt;
extern "C" void setupCompressKernel(const float weights[3]);
extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
#include "Bitmaps.h" // @@ Rename to BitmapTable.h
@ -120,25 +118,20 @@ bool CudaCompressor::isValid() const
// @@ This code is very repetitive and needs to be cleaned up.
void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
/// Compress image using CUDA.
void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void CudaCompressor::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
@ -183,7 +176,7 @@ void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressio
}
clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
@ -197,18 +190,18 @@ void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressio
/// Compress image using CUDA.
void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void CudaCompressor::compressDXT3(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage);
convertToBlockLinear(image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
@ -228,20 +221,13 @@ void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressio
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
OptimalCompress::compressDXT3A(rgba, alphaBlocks + i);
QuickCompress::compressDXT3A(rgba, alphaBlocks + i);
}
// Check for errors.
@ -273,7 +259,7 @@ void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressio
}
clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
@ -288,18 +274,18 @@ void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressio
/// Compress image using CUDA.
void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void CudaCompressor::compressDXT5(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage);
convertToBlockLinear(image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
@ -319,14 +305,7 @@ void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressio
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
@ -364,7 +343,7 @@ void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressio
}
clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
@ -378,3 +357,185 @@ void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressio
}
#if 0
class Task
{
public:
explicit Task(uint numBlocks) : blockMaxCount(numBlocks), blockCount(0)
{
// System memory allocations.
blockLinearImage = new uint[blockMaxCount * 16];
xrefs = new uint[blockMaxCount * 16];
// Device memory allocations.
cudaMalloc((void**) &d_blockLinearImage, blockMaxCount * 16 * sizeof(uint));
cudaMalloc((void**) &d_compressedImage, blockMaxCount * 8U);
// @@ Check for allocation errors.
}
~Task()
{
delete [] blockLinearImage;
delete [] xrefs;
cudaFree(d_blockLinearImage);
cudaFree(d_compressedImage);
}
void addColorBlock(const ColorBlock & rgba)
{
nvDebugCheck(!isFull());
// @@ Count unique colors?
/*
// Convert colors to vectors.
Array<Vector3> pointArray(16);
for(int i = 0; i < 16; i++) {
const Color32 color = rgba.color(i);
pointArray.append(Vector3(color.r, color.g, color.b));
}
// Find best fit line.
const Vector3 axis = Fit::bestLine(pointArray).direction();
// Project points to axis.
float dps[16];
uint * order = &xrefs[blockCount * 16];
for (uint i = 0; i < 16; ++i)
{
dps[i] = dot(pointArray[i], axis);
order[i] = i;
}
// Sort them.
for (uint i = 0; i < 16; ++i)
{
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
{
swap(dps[j], dps[j - 1]);
swap(order[j], order[j - 1]);
}
}
*/
// Write sorted colors to blockLinearImage.
for(uint i = 0; i < 16; ++i)
{
// blockLinearImage[blockCount * 16 + i] = rgba.color(order[i]);
blockLinearImage[blockCount * 16 + i] = rgba.color(i);
}
++blockCount;
}
bool isFull()
{
nvDebugCheck(blockCount <= blockMaxCount);
return blockCount == blockMaxCount;
}
void flush(const OutputOptions::Private & outputOptions)
{
if (blockCount == 0)
{
// Nothing to do.
return;
}
// Copy input color blocks.
cudaMemcpy(d_blockLinearImage, blockLinearImage, blockCount * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernelDXT1(blockCount, d_blockLinearImage, d_compressedImage, d_bitmaps);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
uint * compressedImage = blockLinearImage;
cudaMemcpy(compressedImage, d_compressedImage, blockCount * 8, cudaMemcpyDeviceToHost);
// @@ Sort block indices.
// Output result.
if (outputOptions.outputHandler != NULL)
{
// outputOptions.outputHandler->writeData(compressedImage, blockCount * 8);
}
blockCount = 0;
}
private:
const uint blockMaxCount;
uint blockCount;
uint * blockLinearImage;
uint * xrefs;
uint * d_blockLinearImage;
uint * d_compressedImage;
};
void nv::cudaCompressDXT1_2(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
#if defined HAVE_CUDA
const uint w = image->width();
const uint h = image->height();
const uint blockNum = ((w + 3) / 4) * ((h + 3) / 4);
const uint blockMax = 32768; // 49152, 65535
setupCompressKernelDXT1(compressionOptions.colorWeight.ptr());
ColorBlock rgba;
Task task(min(blockNum, blockMax));
clock_t start = clock();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
task.addColorBlock(rgba);
if (task.isFull())
{
task.flush(outputOptions);
}
}
}
task.flush(outputOptions);
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
#endif // 0

View File

@ -39,20 +39,15 @@ namespace nv
bool isValid() const;
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
private:
uint * m_bitmapTable;
uint * m_data;
uint * m_result;
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
} // nv namespace

View File

@ -148,7 +148,7 @@ inline __device__ bool singleColor(const float3 * colors)
bool sameColor = false;
for (int i = 0; i < 16; i++)
{
sameColor &= (colors[i] == colors[0]);
sameColor &= (colors[idx] == colors[0]);
}
return sameColor;
#else

View File

@ -22,18 +22,15 @@
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_runtime.h>
#endif
using namespace nv;
using namespace cuda;
/* @@ Move this to win32 utils or somewhere else.
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
@ -70,78 +67,16 @@ return !bIsWow64;
}
#endif
*/
static bool isCudaDriverAvailable(int version)
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
nvDebug("*** CUDA driver not found.\n");
return false;
}
if (version >= 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.0.\n");
return false;
}
}
if (version >= 2010)
{
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.1.\n");
return false;
}
}
if (version >= 2020)
{
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
if (driverGetVersion == NULL) {
nvDebug("*** CUDA driver version < 2.2.\n");
return false;
}
int driverVersion;
CUresult err = driverGetVersion(&driverVersion);
if (err != CUDA_SUCCESS) {
nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
return false;
}
return driverVersion >= version;
}
#endif // HAVE_CUDA
return true;
}
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
return false;
}
#if NV_OS_WIN32
if (isWindowsVista()) return false;
//if (isWindowsVista() || !isWow32()) return false;
#endif
int count = deviceCount();
if (count == 1)
{
@ -154,11 +89,9 @@ bool nv::cuda::isHardwarePresent()
{
return false;
}
}
// @@ Make sure that warp size == 32
// @@ Make sure available GPU is faster than the CPU.
}
return count > 0;
#else
@ -182,119 +115,14 @@ int nv::cuda::deviceCount()
return 0;
}
// Make sure device meets requirements:
// - Not an emulation device.
// - Not an integrated device?
// - Faster than CPU.
bool nv::cuda::isValidDevice(int i)
{
#if defined HAVE_CUDA
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major == -1 || device_properties.minor == -1) {
// Emulation device.
return false;
}
#if CUDART_VERSION >= 2030 // 2.3
/*if (device_properties.integrated)
{
// Integrated devices.
return false;
}*/
#endif
return true;
#else
return false;
#endif
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = -1;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
for (int i = 0; i < device_count; i++)
{
if (isValidDevice(i))
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (gflops > max_gflops)
{
max_gflops = gflops;
max_gflops_device = i;
}
}
}
#endif
return max_gflops_device;
}
/// Activate the given devices.
bool nv::cuda::initDevice(int * device_ptr)
bool nv::cuda::setDevice(int i)
{
nvDebugCheck(device_ptr != NULL);
nvCheck(i < deviceCount());
#if defined HAVE_CUDA
#if CUDART_VERSION >= 2030 // 2.3
// Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active.
cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield);
#endif
int device = getFastestDevice();
if (device == -1)
{
// No device is fast enough.
*device_ptr = -1;
return false;
}
// Select CUDA device.
cudaError_t result = cudaSetDevice(device);
if (result == cudaErrorSetOnActiveProcess)
{
int device;
result = cudaGetDevice(&device);
*device_ptr = -1; // No device to cleanup.
return isValidDevice(device); // Return true if device is valid.
}
else if (result != cudaSuccess)
{
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
*device_ptr = -1;
return false;
}
*device_ptr = device;
return true;
cudaError_t result = cudaSetDevice(i);
return result == cudaSuccess;
#else
return false;
#endif
}
void nv::cuda::exitDevice()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
#endif
}

View File

@ -31,11 +31,7 @@ namespace nv
{
bool isHardwarePresent();
int deviceCount();
int getFastestDevice();
bool isValidDevice(int i);
bool initDevice(int * device_ptr);
void exitDevice();
bool setDevice(int i);
};
} // nv namespace

View File

@ -73,7 +73,7 @@ namespace nvtt
Format_DXT1a, // DXT1 with binary alpha.
Format_DXT3,
Format_DXT5,
Format_DXT5n, // Compressed HILO: R=1, G=y, B=0, A=x
Format_DXT5n, // Compressed HILO: R=0, G=x, B=0, A=y
// DX10 formats.
Format_BC1 = Format_DXT1,
@ -194,7 +194,7 @@ namespace nvtt
// Describe the format of the input.
NVTT_API void setFormat(InputFormat format);
// Set the way the input alpha channel is interpreted.
// Set the way the input alpha channel is interpreted. @@ Not implemented!
NVTT_API void setAlphaMode(AlphaMode alphaMode);
// Set gamma settings.

View File

@ -1,8 +1,13 @@
PROJECT(squish)
ENABLE_TESTING()
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SQUISH_SRCS
# alpha.cpp
# alpha.h
# clusterfit.cpp
# clusterfit.h
fastclusterfit.cpp
fastclusterfit.h
weightedclusterfit.cpp
@ -16,13 +21,32 @@ SET(SQUISH_SRCS
config.h
maths.cpp
maths.h
# rangefit.cpp
# rangefit.h
# singlecolourfit.cpp
# singlecolourfit.h
# singlecolourlookup.inl
# squish.cpp
# squish.h
simd.h
simd_sse.h
simd_ve.h)
ADD_LIBRARY(squish STATIC ${SQUISH_SRCS})
IF(CMAKE_COMPILER_IS_GNUCXX)
SET_TARGET_PROPERTIES(squish PROPERTIES COMPILE_FLAGS -fPIC)
ENDIF(CMAKE_COMPILER_IS_GNUCXX)
# libpng
#FIND_PACKAGE(PNG)
#IF(PNG_FOUND)
# INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR})
# ADD_EXECUTABLE(squishpng extra/squishpng.cpp)
# TARGET_LINK_LIBRARIES(squishpng squish ${PNG_LIBRARY})
#ENDIF(PNG_FOUND)
##ADD_EXECUTABLE(squishgen extra/squishgen.cpp)
#ADD_EXECUTABLE(squishtest extra/squishtest.cpp)
#TARGET_LINK_LIBRARIES(squishtest squish)
#ADD_TEST(SQUISHTEST squishtest)

View File

@ -29,8 +29,6 @@
#include "colourblock.h"
#include <cfloat>
#include "fastclusterlookup.inl"
namespace squish {
FastClusterFit::FastClusterFit()
@ -99,6 +97,91 @@ void FastClusterFit::SetColourSet( ColourSet const* colours, int flags )
}
struct Precomp {
float alpha2_sum;
float beta2_sum;
float alphabeta_sum;
float factor;
};
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
void FastClusterFit::DoPrecomputation()
{
int i = 0;
// Three element clusters:
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
int c2 = 16 - c0 - c1;
/*if (c2 == 16) {
// a = b = x2 / 16
s_threeElement[i].alpha2_sum = 0;
s_threeElement[i].beta2_sum = 16;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_threeElement[i].alpha2_sum = 16;
s_threeElement[i].beta2_sum = 0;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_threeElement[i].alpha2_sum = c0 + c1 * 0.25f;
s_threeElement[i].beta2_sum = c2 + c1 * 0.25f;
s_threeElement[i].alphabeta_sum = c1 * 0.25f;
s_threeElement[i].factor = 1.0f / (s_threeElement[i].alpha2_sum * s_threeElement[i].beta2_sum - s_threeElement[i].alphabeta_sum * s_threeElement[i].alphabeta_sum);
}
i++;
}
}
//printf("%d three cluster elements\n", i);
// Four element clusters:
i = 0;
for( int c0 = 0; c0 <= 16; c0++)
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
int c3 = 16 - c0 - c1 - c2;
/*if (c3 == 16) {
// a = b = x3 / 16
s_fourElement[i].alpha2_sum = 16.0f;
s_fourElement[i].beta2_sum = 0.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_fourElement[i].alpha2_sum = 0.0f;
s_fourElement[i].beta2_sum = 16.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_fourElement[i].alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
s_fourElement[i].beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
s_fourElement[i].alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
s_fourElement[i].factor = 1.0f / (s_fourElement[i].alpha2_sum * s_fourElement[i].beta2_sum - s_fourElement[i].alphabeta_sum * s_fourElement[i].alphabeta_sum);
}
i++;
}
}
}
//printf("%d four cluster elements\n", i);
}
void FastClusterFit::SetMetric(float r, float g, float b)
{
#if SQUISH_USE_SIMD

View File

@ -44,6 +44,8 @@ public:
void SetMetric(float r, float g, float b);
float GetBestError() const;
static void DoPrecomputation();
// Make them public
virtual void Compress3( void* block );
virtual void Compress4( void* block );

File diff suppressed because it is too large Load Diff

View File

@ -24,7 +24,6 @@
-------------------------------------------------------------------------- */
#include "maths.h"
#include "simd.h"
#include <cfloat>
namespace squish {
@ -61,39 +60,12 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
}
#define POWER_ITERATION_COUNT 8
#if SQUISH_USE_SIMD
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
Vec4 v = VEC4_CONST( 1.0f );
for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
{
// matrix multiply
Vec4 w = row0*v.SplatX();
w = MultiplyAdd(row1, v.SplatY(), w);
w = MultiplyAdd(row2, v.SplatZ(), w);
const int NUM = 8;
// get max component from xyz in all channels
Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
// divide through and advance
v = w*Reciprocal(a);
}
return v.GetVec3();
}
#else
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
Vec3 v(1, 1, 1);
for (int i = 0; i < POWER_ITERATION_COUNT; i++)
{
for(int i = 0; i < NUM; i++) {
float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2];
float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4];
float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5];
@ -110,6 +82,5 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
return v;
}
#endif
} // namespace squish

View File

@ -131,13 +131,10 @@ namespace squish {
void WeightedClusterFit::Compress3( void* block )
{
int const count = m_colours->GetCount();
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -149,11 +146,11 @@ namespace squish {
int b0 = 0, b1 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec4 x1 = zero;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec4 const x2 = m_xsum - x1 - x0;
@ -176,21 +173,24 @@ namespace squish {
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -221,17 +221,17 @@ namespace squish {
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
// save the block
@ -244,16 +244,12 @@ namespace squish {
void WeightedClusterFit::Compress4( void* block )
{
int const count = m_colours->GetCount();
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half = VEC4_CONST(0.5f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -264,30 +260,30 @@ namespace squish {
int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec4 x1 = zero;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec4 x2 = zero;
for( int c2 = 0; c2 <= count-c0-c1; c2++)
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
Vec4 const x3 = m_xsum - x2 - x1 - x0;
//Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); // alphax_sum, alpha2_sum
Vec4 const alpha2_sum = alphax_sum.SplatW();
//Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
Vec4 const betax_sum = x3 + MultiplyAdd(x2, twothirds, x1 * onethird); // betax_sum, beta2_sum
Vec4 const beta2_sum = betax_sum.SplatW();
//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum
//float const alphabeta_sum = w1 * (2.0f/9.0f) + w2 * (2.0f/9.0f);
Vec4 const alphabeta_sum = two * (x1 * onethird + x2 * onethird).SplatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
@ -295,21 +291,24 @@ namespace squish {
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -347,20 +346,18 @@ namespace squish {
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
// save the error
m_besterror = besterror;
@ -371,13 +368,6 @@ namespace squish {
void WeightedClusterFit::Compress3( void* block )
{
int const count = m_colours->GetCount();
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -389,12 +379,12 @@ namespace squish {
int b0 = 0, b1 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
float w2 = m_wsum - w0 - w1;
@ -410,9 +400,16 @@ namespace squish {
Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -453,20 +450,18 @@ namespace squish {
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock3( beststart, bestend, bestindices, block );
WriteColourBlock3( beststart, bestend, ordered, block );
// save the error
m_besterror = besterror;
@ -475,13 +470,6 @@ namespace squish {
void WeightedClusterFit::Compress4( void* block )
{
int const count = m_colours->GetCount();
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -492,17 +480,17 @@ namespace squish {
int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec3 x2(0.0f);
float w2 = 0.0f;
for( int c2 = 0; c2 <= count-c0-c1; c2++)
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
float w3 = m_wsum - w0 - w1 - w2;
@ -517,9 +505,16 @@ namespace squish {
Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
// clamp to the grid
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -568,20 +563,18 @@ namespace squish {
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock4( beststart, bestend, bestindices, block );
WriteColourBlock4( beststart, bestend, ordered, block );
// save the error
m_besterror = besterror;

View File

@ -42,11 +42,11 @@ struct MyOutputHandler : public nvtt::OutputHandler
MyOutputHandler(const char * name) : total(0), progress(0), percentage(0), stream(new nv::StdOutputStream(name)) {}
virtual ~MyOutputHandler() { delete stream; }
void setTotal(int64 t)
virtual void setTotal(int64 t)
{
total = t + 128;
}
void setDisplayProgress(bool b)
virtual void setDisplayProgress(bool b)
{
verbose = b;
}
@ -87,10 +87,7 @@ struct MyErrorHandler : public nvtt::ErrorHandler
{
virtual void error(nvtt::Error e)
{
#if _DEBUG
nvDebugBreak();
#endif
printf("Error: '%s'\n", nvtt::errorString(e));
}
};
@ -257,12 +254,7 @@ int main(int argc, char *argv[])
}
}
const uint version = nvtt::version();
const uint major = version / 100;
const uint minor = version % 100;
printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007\n\n", major, minor);
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
if (input.isNull())
{
@ -289,7 +281,7 @@ int main(int argc, char *argv[])
printf(" -bc4 \tBC4 format (ATI1)\n");
printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n");
return EXIT_FAILURE;
return 1;
}
// @@ Make sure input file exists.
@ -304,13 +296,13 @@ int main(int argc, char *argv[])
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
return EXIT_FAILURE;
return 1;
}
if (!dds.isSupported() || dds.isTexture3D())
{
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
return EXIT_FAILURE;
return 1;
}
uint faceCount;
@ -347,7 +339,7 @@ int main(int argc, char *argv[])
if (!image.load(input))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return EXIT_FAILURE;
return 1;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
@ -381,6 +373,7 @@ int main(int argc, char *argv[])
inputOptions.setMipmapGeneration(false);
}
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(format);
if (fast)
@ -410,22 +403,12 @@ int main(int argc, char *argv[])
if (outputHandler.stream->isError())
{
fprintf(stderr, "Error opening '%s' for writting\n", output.str());
return EXIT_FAILURE;
return 1;
}
nvtt::Compressor compressor;
compressor.enableCudaAcceleration(!nocuda);
printf("CUDA acceleration ");
if (compressor.isCudaAccelerationEnabled())
{
printf("ENABLED\n\n");
}
else
{
printf("DISABLED\n\n");
}
outputHandler.setTotal(compressor.estimateSize(inputOptions, compressionOptions));
outputHandler.setDisplayProgress(!silent);
@ -438,16 +421,27 @@ int main(int argc, char *argv[])
// fflush(stdout);
// getchar();
/* LARGE_INTEGER temp;
QueryPerformanceFrequency((LARGE_INTEGER*) &temp);
double freq = ((double) temp.QuadPart) / 1000.0;
LARGE_INTEGER start_time;
QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
*/
clock_t start = clock();
if (!compressor.process(inputOptions, compressionOptions, outputOptions))
{
return EXIT_FAILURE;
}
compressor.process(inputOptions, compressionOptions, outputOptions);
/*
LARGE_INTEGER end_time;
QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
float diff_time = (float) (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
printf("\rtime taken: %.3f seconds\n", diff_time/1000);
*/
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return EXIT_SUCCESS;
return 0;
}

View File

@ -84,7 +84,7 @@ struct Error
{
mabse /= samples;
mse /= samples;
rmse = sqrtf(mse);
rmse = sqrt(mse);
psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
}
@ -134,7 +134,7 @@ struct NormalError
{
ade /= samples;
mse /= samples * 3;
rmse = sqrtf(mse);
rmse = sqrt(mse);
psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
}
}

View File

@ -73,12 +73,10 @@ int main(int argc, char *argv[])
float scale = 0.5f;
float gamma = 2.2f;
nv::AutoPtr<nv::Filter> filter;
nv::Filter * filter = NULL;
nv::Path input;
nv::Path output;
nv::FloatImage::WrapMode wrapMode = nv::FloatImage::WrapMode_Mirror;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
@ -110,18 +108,9 @@ int main(int argc, char *argv[])
else if (strcmp("lanczos", argv[i]) == 0) filter = new nv::LanczosFilter();
else if (strcmp("kaiser", argv[i]) == 0) {
filter = new nv::KaiserFilter(3);
((nv::KaiserFilter *)filter.ptr())->setParameters(4.0f, 1.0f);
((nv::KaiserFilter *)filter)->setParameters(4.0f, 1.0f);
}
}
else if (strcmp("-w", argv[i]) == 0)
{
if (i+1 == argc) break;
i++;
if (strcmp("mirror", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Mirror;
else if (strcmp("repeat", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Repeat;
else if (strcmp("clamp", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Clamp;
}
else if (argv[i][0] != '-')
{
input = argv[i];
@ -151,10 +140,6 @@ int main(int argc, char *argv[])
printf(" * mitchell\n");
printf(" * lanczos\n");
printf(" * kaiser\n");
printf(" -w mode One of the following: (default = 'mirror')\n");
printf(" * mirror\n");
printf(" * repeat\n");
printf(" * clamp\n");
return 1;
}
@ -170,14 +155,15 @@ int main(int argc, char *argv[])
nv::FloatImage fimage(&image);
fimage.toLinear(0, 3, gamma);
nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(*filter, uint(image.width() * scale), uint(image.height() * scale), wrapMode));
nv::AutoPtr<nv::FloatImage> fresult(fimage.downSample(*filter, uint(image.width() * scale), uint(image.height() * scale), nv::FloatImage::WrapMode_Mirror));
nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
result->setFormat(nv::Image::Format_ARGB);
nv::StdOutputStream stream(output);
nv::ImageIO::saveTGA(stream, result.ptr()); // @@ Add generic save function. Add support for png too.
delete filter;
return 0;
}