Tag 2.0.8 for release.

This commit is contained in:
castano
2010-05-14 18:01:41 +00:00
parent f6a39d6eab
commit eb01ca604f
375 changed files with 12760 additions and 28091 deletions

View File

@ -1,12 +1,11 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/poshlib)
SUBDIRS(nvcore)
SUBDIRS(nvmath)
SUBDIRS(nvimage)
SUBDIRS(nvtt)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
# OpenGL
INCLUDE(FindOpenGL)
IF(OPENGL_FOUND)
@ -16,7 +15,8 @@ ELSE(OPENGL_FOUND)
ENDIF(OPENGL_FOUND)
# GLUT
INCLUDE(FindGLUT)
INCLUDE(${NV_CMAKE_DIR}/FindGLUT.cmake)
#INCLUDE(FindGLUT)
IF(GLUT_FOUND)
MESSAGE(STATUS "Looking for GLUT - found")
ELSE(GLUT_FOUND)
@ -48,7 +48,7 @@ ELSE(CG_FOUND)
ENDIF(CG_FOUND)
# CUDA
FIND_PACKAGE(CUDA)
INCLUDE(${NV_CMAKE_DIR}/FindCUDA.cmake)
IF(CUDA_FOUND)
SET(HAVE_CUDA ${CUDA_FOUND} CACHE BOOL "Set to TRUE if CUDA is found, FALSE otherwise")
MESSAGE(STATUS "Looking for CUDA - found")
@ -65,15 +65,6 @@ ELSE(MAYA_FOUND)
MESSAGE(STATUS "Looking for Maya - not found")
ENDIF(MAYA_FOUND)
# FreeImage
INCLUDE(${NV_CMAKE_DIR}/FindFreeImage.cmake)
IF(FREEIMAGE_FOUND)
SET(HAVE_FREEIMAGE ${FREEIMAGE_FOUND} CACHE BOOL "Set to TRUE if FreeImage is found, FALSE otherwise")
MESSAGE(STATUS "Looking for FreeImage - found")
ELSE(FREEIMAGE_FOUND)
MESSAGE(STATUS "Looking for FreeImage - not found")
ENDIF(FREEIMAGE_FOUND)
# JPEG
INCLUDE(FindJPEG)
IF(JPEG_FOUND)
@ -93,7 +84,6 @@ ELSE(PNG_FOUND)
ENDIF(PNG_FOUND)
# TIFF
SET(TIFF_NAMES libtiff)
INCLUDE(FindTIFF)
IF(TIFF_FOUND)
SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise")
@ -111,15 +101,6 @@ ELSE(OPENEXR_FOUND)
MESSAGE(STATUS "Looking for OpenEXR - not found")
ENDIF(OPENEXR_FOUND)
# OpenMP
INCLUDE(FindOpenMP)
IF(OPENMP_FOUND)
SET(HAVE_OPENMP ${OPENMP_FOUND} CACHE BOOL "Set to TRUE if OpenMP is found, FALSE otherwise")
MESSAGE(STATUS "Looking for OpenMP - found")
ELSE(OPENMP_FOUND)
MESSAGE(STATUS "Looking for OpenMP - not found")
ENDIF(OPENMP_FOUND)
# Qt
FIND_PACKAGE(Qt4)
@ -138,3 +119,5 @@ CHECK_INCLUDE_FILES(malloc.h HAVE_MALLOC_H)
CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/nvconfig.h.in ${CMAKE_CURRENT_BINARY_DIR}/nvconfig.h)
#INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/nvconfig.h DESTINATION include)

View File

@ -7,13 +7,10 @@
#cmakedefine HAVE_EXECINFO_H
#cmakedefine HAVE_MALLOC_H
#cmakedefine HAVE_OPENMP
#cmakedefine HAVE_PNG
#cmakedefine HAVE_JPEG
#cmakedefine HAVE_TIFF
#cmakedefine HAVE_OPENEXR
#cmakedefine HAVE_FREEIMAGE
#cmakedefine HAVE_MAYA

View File

@ -1,154 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_ALGORITHMS_H
#define NV_CORE_ALGORITHMS_H
#include "nvcore.h"
namespace nv
{
/// Return the maximum of two values.
template <typename T>
inline const T & max(const T & a, const T & b)
{
//return std::max(a, b);
if( a < b ) {
return b;
}
return a;
}
/// Return the minimum of two values.
template <typename T>
inline const T & min(const T & a, const T & b)
{
//return std::min(a, b);
if( b < a ) {
return b;
}
return a;
}
/// Clamp between two values.
template <typename T>
inline const T & clamp(const T & x, const T & a, const T & b)
{
return min(max(x, a), b);
}
/// Delete all the elements of a container.
template <typename T>
void deleteAll(T & container)
{
for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
{
delete container[i];
}
}
// @@ Should swap be implemented here?
template <typename T, template <typename T> class C>
void sort(C<T> & container)
{
introsortLoop(container, 0, container.count());
insertionSort(container, 0, container.count());
}
template <typename T, template <typename T> class C>
void sort(C<T> & container, uint begin, uint end)
{
if (begin < end)
{
introsortLoop(container, begin, end);
insertionSort(container, begin, end);
}
}
template <typename T, template <typename T> class C>
void insertionSort(C<T> & container)
{
insertionSort(container, 0, container.count());
}
template <typename T, template <typename T> class C>
void insertionSort(C<T> & container, uint begin, uint end)
{
for (uint i = begin + 1; i != end; ++i)
{
T value = container[i];
uint j = i;
while (j != begin && container[j-1] > value)
{
container[j] = container[j-1];
--j;
}
if (i != j)
{
container[j] = value;
}
}
}
template <typename T, template <typename T> class C>
void introsortLoop(C<T> & container, uint begin, uint end)
{
while (end-begin > 16)
{
uint p = partition(container, begin, end, medianof3(container, begin, begin+((end-begin)/2)+1, end-1));
introsortLoop(container, p, end);
end = p;
}
}
template <typename T, template <typename T> class C>
uint partition(C<T> & a, uint begin, uint end, const T & x)
{
int i = begin, j = end;
while (true)
{
while (a[i] < x) ++i;
--j;
while (x < a[j]) --j;
if (i >= j)
return i;
swap(a[i], a[j]);
i++;
}
}
template <typename T, template <typename T> class C>
const T & medianof3(C<T> & a, uint lo, uint mid, uint hi)
{
if (a[mid] < a[lo])
{
if (a[hi] < a[mid])
{
return a[mid];
}
else
{
return (a[hi] < a[lo]) ? a[hi] : a[lo];
}
}
else
{
if (a[hi] < a[mid])
{
return (a[hi] < a[lo]) ? a[lo] : a[hi];
}
else
{
return a[mid];
}
}
}
} // nv namespace
#endif // NV_CORE_ALGORITHMS_H

168
src/nvcore/BitArray.h Normal file
View File

@ -0,0 +1,168 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_BITARRAY_H
#define NV_CORE_BITARRAY_H
#include <nvcore/nvcore.h>
#include <nvcore/Containers.h>
namespace nv
{
/// Count the bits of @a x.
inline uint bitsSet(uint8 x) {
uint count = 0;
for(; x != 0; x >>= 1) {
count += (x & 1);
}
return count;
}
/// Count the bits of @a x.
inline uint bitsSet(uint32 x, int bits) {
uint count = 0;
for(; x != 0 && bits != 0; x >>= 1, bits--) {
count += (x & 1);
}
return count;
}
/// Simple bit array.
class BitArray
{
public:
/// Default ctor.
BitArray() {}
/// Ctor with initial m_size.
BitArray(uint sz)
{
resize(sz);
}
/// Get array m_size.
uint size() const { return m_size; }
/// Clear array m_size.
void clear() { resize(0); }
/// Set array m_size.
void resize(uint sz)
{
m_size = sz;
m_bitArray.resize( (m_size + 7) >> 3 );
}
/// Get bit.
bool bitAt(uint b) const
{
nvDebugCheck( b < m_size );
return (m_bitArray[b >> 3] & (1 << (b & 7))) != 0;
}
/// Set a bit.
void setBitAt(uint b)
{
nvDebugCheck( b < m_size );
m_bitArray[b >> 3] |= (1 << (b & 7));
}
/// Clear a bit.
void clearBitAt( uint b )
{
nvDebugCheck( b < m_size );
m_bitArray[b >> 3] &= ~(1 << (b & 7));
}
/// Clear all the bits.
void clearAll()
{
memset(m_bitArray.unsecureBuffer(), 0, m_bitArray.size());
}
/// Set all the bits.
void setAll()
{
memset(m_bitArray.unsecureBuffer(), 0xFF, m_bitArray.size());
}
/// Toggle all the bits.
void toggleAll()
{
const uint byte_num = m_bitArray.size();
for(uint b = 0; b < byte_num; b++) {
m_bitArray[b] ^= 0xFF;
}
}
/// Get a byte of the bit array.
const uint8 & byteAt(uint index) const
{
return m_bitArray[index];
}
/// Set the given byte of the byte array.
void setByteAt(uint index, uint8 b)
{
m_bitArray[index] = b;
}
/// Count the number of bits set.
uint countSetBits() const
{
const uint num = m_bitArray.size();
if( num == 0 ) {
return 0;
}
uint count = 0;
for(uint i = 0; i < num - 1; i++) {
count += bitsSet(m_bitArray[i]);
}
count += bitsSet(m_bitArray[num-1], m_size & 0x7);
//piDebugCheck(count + countClearBits() == m_size);
return count;
}
/// Count the number of bits clear.
uint countClearBits() const {
const uint num = m_bitArray.size();
if( num == 0 ) {
return 0;
}
uint count = 0;
for(uint i = 0; i < num - 1; i++) {
count += bitsSet(~m_bitArray[i]);
}
count += bitsSet(~m_bitArray[num-1], m_size & 0x7);
//piDebugCheck(count + countSetBits() == m_size);
return count;
}
friend void swap(BitArray & a, BitArray & b)
{
swap(a.m_size, b.m_size);
swap(a.m_bitArray, b.m_bitArray);
}
private:
/// Number of bits stored.
uint m_size;
/// Array of bits.
Array<uint8> m_bitArray;
};
} // nv namespace
#endif // _PI_CORE_BITARRAY_H_

View File

@ -1,25 +1,27 @@
PROJECT(nvcore)
ADD_SUBDIRECTORY(poshlib)
SET(CORE_SRCS
nvcore.h
Algorithms.h
Containers.h
Debug.h Debug.cpp
DefsGnucDarwin.h
DefsGnucLinux.h
DefsGnucWin32.h
DefsVcWin32.h
FileSystem.h FileSystem.cpp
Library.h Library.cpp
Memory.h Memory.cpp
Ptr.h
RefCounted.h RefCounted.cpp
StrLib.h StrLib.cpp
Stream.h
StdStream.h
TextReader.h TextReader.cpp
TextWriter.h TextWriter.cpp
Timer.h)
nvcore.h
Ptr.h
BitArray.h
Memory.h
Memory.cpp
Debug.h
Debug.cpp
Containers.h
StrLib.h
StrLib.cpp
Stream.h
StdStream.h
TextReader.h
TextReader.cpp
TextWriter.h
TextWriter.cpp
Radix.h
Radix.cpp
Library.h
Library.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
@ -27,19 +29,19 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVCORE_EXPORTS)
IF(UNIX)
SET(LIBS ${LIBS} ${CMAKE_DL_LIBS})
SET(LIBS ${LIBS} ${CMAKE_DL_LIBS})
ENDIF(UNIX)
IF(NVCORE_SHARED)
ADD_DEFINITIONS(-DNVCORE_SHARED=1)
ADD_LIBRARY(nvcore SHARED ${CORE_SRCS})
ADD_DEFINITIONS(-DNVCORE_SHARED=1)
ADD_LIBRARY(nvcore SHARED ${CORE_SRCS})
ELSE(NVCORE_SHARED)
ADD_LIBRARY(nvcore ${CORE_SRCS})
ADD_LIBRARY(nvcore ${CORE_SRCS})
ENDIF(NVCORE_SHARED)
TARGET_LINK_LIBRARIES(nvcore ${LIBS})
INSTALL(TARGETS nvcore
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib/static)
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib/static)

View File

@ -16,10 +16,9 @@ Do not use memmove in insert & remove, use copy ctors instead.
// nvcore
#include "nvcore.h"
#include "Memory.h"
#include "Debug.h"
//#include "Stream.h"
#include <nvcore/nvcore.h>
#include <nvcore/Memory.h>
#include <nvcore/Debug.h>
#include <string.h> // memmove
#include <new> // for placement new
@ -71,10 +70,40 @@ namespace nv
{
// Templates
/// Return the maximum of two values.
template <typename T>
inline const T & max(const T & a, const T & b)
{
//return std::max(a, b);
if( a < b ) {
return b;
}
return a;
}
/// Return the minimum of two values.
template <typename T>
inline const T & min(const T & a, const T & b)
{
//return std::min(a, b);
if( b < a ) {
return b;
}
return a;
}
/// Clamp between two values.
template <typename T>
inline const T & clamp(const T & x, const T & a, const T & b)
{
return min(max(x, a), b);
}
/// Swap two values.
template <typename T>
inline void swap(T & a, T & b)
{
//return std::swap(a, b);
T temp = a;
a = b;
b = temp;
@ -105,6 +134,16 @@ namespace nv
uint operator()(uint x) const { return x; }
};
/// Delete all the elements of a container.
template <typename T>
void deleteAll(T & container)
{
for(typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
{
delete container[i];
}
}
/** Return the next power of two.
* @see http://graphics.stanford.edu/~seander/bithacks.html
@ -115,7 +154,7 @@ namespace nv
inline uint nextPowerOfTwo( uint x )
{
nvDebugCheck( x != 0 );
#if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction.
#if 1 // On modern CPUs this is as fast as using the bsr instruction.
x--;
x |= x >> 1;
x |= x >> 2;
@ -138,6 +177,15 @@ namespace nv
return (n & (n-1)) == 0;
}
/// Simple iterator interface.
template <typename T>
struct Iterator
{
virtual void advance();
virtual bool isDone();
virtual T current();
};
/**
* Replacement for std::vector that is easier to debug and provides
@ -179,29 +227,20 @@ namespace nv
}
/// Const element access.
/// Const and save vector access.
const T & operator[]( uint index ) const
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
const T & at( uint index ) const
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
/// Element access.
/// Safe vector access.
T & operator[] ( uint index )
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
T & at( uint index )
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
/// Get vector size.
uint size() const { return m_size; }
@ -213,7 +252,7 @@ namespace nv
const T * buffer() const { return m_buffer; }
/// Get vector pointer.
T * mutableBuffer() { return m_buffer; }
T * unsecureBuffer() { return m_buffer; }
/// Is vector empty.
bool isEmpty() const { return m_size == 0; }
@ -294,22 +333,15 @@ namespace nv
return m_buffer[0];
}
/// Return index of the
bool find(const T & element, uint * index)
{
for (uint i = 0; i < m_size; i++) {
if (index != NULL) *index = i;
return true;
}
return false;
}
/// Check if the given element is contained in the array.
bool contains(const T & e) const
{
return find(e, NULL);
for (uint i = 0; i < m_size; i++) {
if (m_buffer[i] == e) return true;
}
return false;
}
/// Remove the element at the given index. This is an expensive operation!
void removeAt( uint index )
{
@ -495,10 +527,9 @@ namespace nv
}
/// Assignment operator.
Array<T> & operator=( const Array<T> & a )
void operator=( const Array<T> & a )
{
copy( a.m_buffer, a.m_size );
return *this;
}
/*
@ -595,43 +626,18 @@ namespace nv
template<typename T, typename U, typename hash_functor = hash<T> >
class NVCORE_CLASS HashMap
{
NV_FORBID_COPY(HashMap)
public:
/// Default ctor.
HashMap() : entry_count(0), size_mask(-1), table(NULL) { }
// Copy ctor.
HashMap(const HashMap & map) : entry_count(0), size_mask(-1), table(NULL)
{
operator = (map);
}
/// Ctor with size hint.
explicit HashMap(int size_hint) : entry_count(0), size_mask(-1), table(NULL) { setCapacity(size_hint); }
/// Dtor.
~HashMap() { clear(); }
// Assignment operator.
void operator= (const HashMap & map)
{
clear();
if (entry_count > 0)
{
entry_count = map.entry_count;
size_mask = map.size_mask;
const uint size = uint(size_mask + 1);
table = (Entry *)nv::mem::malloc(sizeof(Entry) * size);
// Copy elements using copy ctor.
for (uint i = 0; i < size; i++)
{
new (table + i) Entry(map.table[i]);
}
}
}
/// Set a new or existing value under the key, to the value.
void set(const T& key, const U& value)

View File

@ -1,7 +1,7 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "Debug.h"
#include "StrLib.h"
#include <nvcore/Debug.h>
#include <nvcore/StrLib.h>
// Extern
#if NV_OS_WIN32 //&& NV_CC_MSVC
@ -34,7 +34,7 @@
# endif
#endif
#if NV_OS_DARWIN || NV_OS_FREEBSD
#if NV_OS_DARWIN
# include <unistd.h> // getpid
# include <sys/types.h>
# include <sys/sysctl.h> // sysctl
@ -199,14 +199,6 @@ namespace
return (void *) ucp->uc_mcontext->ss.eip;
# endif
# endif
# elif NV_OS_FREEBSD
# if NV_CPU_X86_64
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.mc_rip;
# elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.mc_eip;
# endif
# else
# if NV_CPU_X86_64
// #define REG_RIP REG_INDEX(rip) // seems to be 16

View File

@ -3,7 +3,7 @@
#ifndef NV_CORE_DEBUG_H
#define NV_CORE_DEBUG_H
#include "nvcore.h"
#include <nvcore/nvcore.h>
#if defined(HAVE_STDARG_H)
# include <stdarg.h> // va_list

View File

@ -1,56 +0,0 @@
// This code is in the public domain -- castano@gmail.com
#include "FileSystem.h"
#include <nvcore/nvcore.h>
#if NV_OS_WIN32
#define _CRT_NONSTDC_NO_WARNINGS // _chdir is defined deprecated, but that's a bug, chdir is deprecated, _chdir is *not*.
//#include <shlwapi.h> // PathFileExists
#include <windows.h> // GetFileAttributes
#include <direct.h> // _mkdir
#else
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#endif
using namespace nv;
bool FileSystem::exists(const char * path)
{
#if NV_OS_UNIX
return access(path, F_OK|R_OK) == 0;
//struct stat buf;
//return stat(path, &buf) == 0;
#elif NV_OS_WIN32
// PathFileExists requires linking to shlwapi.lib
//return PathFileExists(path) != 0;
return GetFileAttributes(path) != 0xFFFFFFFF;
#else
if (FILE * fp = fopen(path, "r"))
{
fclose(fp);
return true;
}
return false;
#endif
}
bool FileSystem::createDirectory(const char * path)
{
#if NV_OS_WIN32
return _mkdir(path) != -1;
#else
return mkdir(path, 0777) != -1;
#endif
}
bool FileSystem::changeDirectory(const char * path)
{
#if NV_OS_WIN32
return _chdir(path) != -1;
#else
return chdir(path) != -1;
#endif
}

View File

@ -1,23 +0,0 @@
// This code is in the public domain -- castano@gmail.com
#ifndef NV_CORE_FILESYSTEM_H
#define NV_CORE_FILESYSTEM_H
#include "nvcore.h"
namespace nv
{
namespace FileSystem
{
NVCORE_API bool exists(const char * path);
NVCORE_API bool createDirectory(const char * path);
NVCORE_API bool changeDirectory(const char * path);
} // FileSystem namespace
} // nv namespace
#endif // NV_CORE_FILESYSTEM_H

View File

@ -3,7 +3,7 @@
#ifndef NV_CORE_LIBRARY_H
#define NV_CORE_LIBRARY_H
#include "nvcore.h"
#include <nvcore/nvcore.h>
#if NV_OS_WIN32
#define LIBRARY_NAME(name) #name ".dll"

View File

@ -3,7 +3,7 @@
#ifndef NV_CORE_MEMORY_H
#define NV_CORE_MEMORY_H
#include "nvcore.h"
#include <nvcore/nvcore.h>
#include <stdlib.h> // malloc(), realloc() and free()
#include <stddef.h> // size_t

31
src/nvcore/Prefetch.h Normal file
View File

@ -0,0 +1,31 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_PREFETCH_H
#define NV_CORE_PREFETCH_H
#include <nvcore/nvcore.h>
// nvPrefetch
#if NV_CC_GNUC
#define nvPrefetch(ptr) __builtin_prefetch(ptr)
#elif NV_CC_MSVC
#if NV_CPU_X86
__forceinline void nvPrefetch(const void * mem)
{
__asm mov ecx, mem
__asm prefetcht0 [ecx];
// __asm prefetchnta [ecx];
}
#endif // NV_CPU_X86
#else // NV_CC_MSVC
// do nothing in other case.
#define nvPrefetch(ptr)
#endif // NV_CC_MSVC
#endif // NV_CORE_PREFETCH_H

View File

@ -3,12 +3,11 @@
#ifndef NV_CORE_PTR_H
#define NV_CORE_PTR_H
#include "nvcore.h"
#include "Debug.h"
#include <nvcore/nvcore.h>
#include <nvcore/Debug.h>
#include <stdio.h> // NULL
namespace nv
{
@ -30,11 +29,11 @@ class AutoPtr
NV_FORBID_HEAPALLOC();
public:
/// Default ctor.
AutoPtr() : m_ptr(NULL) { }
/// Ctor.
AutoPtr(T * p = NULL) : m_ptr(p) { }
template <class Q>
AutoPtr(Q * p) : m_ptr(static_cast<T *>(p)) { }
explicit AutoPtr( T * p ) : m_ptr(p) { }
/** Dtor. Deletes owned pointer. */
~AutoPtr() {
@ -51,15 +50,6 @@ public:
}
}
template <class Q>
void operator=( Q * p ) {
if (p != m_ptr)
{
delete m_ptr;
m_ptr = static_cast<T *>(p);
}
}
/** Member access. */
T * operator -> () const {
nvDebugCheck(m_ptr != NULL);
@ -106,23 +96,125 @@ private:
T * m_ptr;
};
#if 0
/** Reference counted base class to be used with Pointer.
*
* The only requirement of the Pointer class is that the RefCounted class implements the
* addRef and release methods.
*/
class RefCounted
{
NV_FORBID_COPY(RefCounted);
public:
/// Ctor.
RefCounted() : m_count(0), m_weak_proxy(NULL)
{
s_total_obj_count++;
}
/// Virtual dtor.
virtual ~RefCounted()
{
nvCheck( m_count == 0 );
nvCheck( s_total_obj_count > 0 );
s_total_obj_count--;
}
/// Increase reference count.
uint addRef() const
{
s_total_ref_count++;
m_count++;
return m_count;
}
/// Decrease reference count and remove when 0.
uint release() const
{
nvCheck( m_count > 0 );
s_total_ref_count--;
m_count--;
if( m_count == 0 ) {
releaseWeakProxy();
delete this;
return 0;
}
return m_count;
}
/// Get weak proxy.
WeakProxy * getWeakProxy() const
{
if (m_weak_proxy == NULL) {
m_weak_proxy = new WeakProxy;
m_weak_proxy->AddRef();
}
return m_weak_proxy;
}
/// Release the weak proxy.
void releaseWeakProxy() const
{
if (m_weak_proxy != NULL) {
m_weak_proxy->NotifyObjectDied();
m_weak_proxy->Release();
m_weak_proxy = NULL;
}
}
/** @name Debug methods: */
//@{
/// Get reference count.
int refCount() const
{
return m_count;
}
/// Get total number of objects.
static int totalObjectCount()
{
return s_total_obj_count;
}
/// Get total number of references.
static int totalReferenceCount()
{
return s_total_ref_count;
}
//@}
private:
NVCORE_API static int s_total_ref_count;
NVCORE_API static int s_total_obj_count;
mutable int m_count;
mutable WeakProxy * weak_proxy;
};
#endif
/// Smart pointer template class.
template <class BaseClass>
class SmartPtr {
class Pointer {
public:
// BaseClass must implement addRef() and release().
typedef SmartPtr<BaseClass> ThisType;
typedef Pointer<BaseClass> ThisType;
/// Default ctor.
SmartPtr() : m_ptr(NULL)
Pointer() : m_ptr(NULL)
{
}
/** Other type assignment. */
template <class OtherBase>
SmartPtr( const SmartPtr<OtherBase> & tc )
Pointer( const Pointer<OtherBase> & tc )
{
m_ptr = static_cast<BaseClass *>( tc.ptr() );
if( m_ptr ) {
@ -131,7 +223,7 @@ public:
}
/** Copy ctor. */
SmartPtr( const ThisType & bc )
Pointer( const ThisType & bc )
{
m_ptr = bc.ptr();
if( m_ptr ) {
@ -139,8 +231,8 @@ public:
}
}
/** Copy cast ctor. SmartPtr(NULL) is valid. */
explicit SmartPtr( BaseClass * bc )
/** Copy cast ctor. Pointer(NULL) is valid. */
explicit Pointer( BaseClass * bc )
{
m_ptr = bc;
if( m_ptr ) {
@ -149,7 +241,7 @@ public:
}
/** Dtor. */
~SmartPtr()
~Pointer()
{
set(NULL);
}
@ -183,7 +275,7 @@ public:
//@{
/** Other type assignment. */
template <class OtherBase>
void operator = ( const SmartPtr<OtherBase> & tc )
void operator = ( const Pointer<OtherBase> & tc )
{
set( static_cast<BaseClass *>(tc.ptr()) );
}
@ -206,7 +298,7 @@ public:
//@{
/** Other type equal comparation. */
template <class OtherBase>
bool operator == ( const SmartPtr<OtherBase> & other ) const
bool operator == ( const Pointer<OtherBase> & other ) const
{
return m_ptr == other.ptr();
}
@ -225,7 +317,7 @@ public:
/** Other type not equal comparation. */
template <class OtherBase>
bool operator != ( const SmartPtr<OtherBase> & other ) const
bool operator != ( const Pointer<OtherBase> & other ) const
{
return m_ptr != other.ptr();
}

429
src/nvcore/Radix.cpp Normal file
View File

@ -0,0 +1,429 @@
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Contains source code from the article "Radix Sort Revisited".
* \file Radix.cpp
* \author Pierre Terdiman
* \date April, 4, 2000
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Revisited Radix Sort.
* This is my new radix routine:
* - it uses indices and doesn't recopy the values anymore, hence wasting less ram
* - it creates all the histograms in one run instead of four
* - it sorts words faster than dwords and bytes faster than words
* - it correctly sorts negative floating-point values by patching the offsets
* - it automatically takes advantage of temporal coherence
* - multiple keys support is a side effect of temporal coherence
* - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway]
*
* History:
* - 08.15.98: very first version
* - 04.04.00: recoded for the radix article
* - 12.xx.00: code lifting
* - 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here)
* - 10.11.01: added local ram support
* - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting......
*
* \class RadixSort
* \author Pierre Terdiman
* \version 1.3
* \date August, 15, 1998
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/*
To do:
- add an offset parameter between two input values (avoid some data recopy sometimes)
- unroll ? asm ?
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Header
#include <nvcore/Radix.h>
#include <string.h> // memset
//using namespace IceCore;
#define DELETEARRAY(a) { delete [] a; a = NULL; }
#define CHECKALLOC(a)
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Constructor.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort::RadixSort() : mCurrentSize(0), mPreviousSize(0), mIndices(NULL), mIndices2(NULL), mTotalCalls(0), mNbHits(0)
{
#ifndef RADIX_LOCAL_RAM
// Allocate input-independent ram
mHistogram = new uint32[256*4];
mOffset = new uint32[256];
#endif
// Initialize indices
resetIndices();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Destructor.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort::~RadixSort()
{
// Release everything
#ifndef RADIX_LOCAL_RAM
DELETEARRAY(mOffset);
DELETEARRAY(mHistogram);
#endif
DELETEARRAY(mIndices2);
DELETEARRAY(mIndices);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Resizes the inner lists.
* \param nb [in] new size (number of dwords)
* \return true if success
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool RadixSort::resize(uint32 nb)
{
// Free previously used ram
DELETEARRAY(mIndices2);
DELETEARRAY(mIndices);
// Get some fresh one
mIndices = new uint32[nb]; CHECKALLOC(mIndices);
mIndices2 = new uint32[nb]; CHECKALLOC(mIndices2);
mCurrentSize = nb;
// Initialize indices so that the input buffer is read in sequential order
resetIndices();
return true;
}
#define CHECK_RESIZE(n) \
if(n!=mPreviousSize) \
{ \
if(n>mCurrentSize) resize(n); \
else resetIndices(); \
mPreviousSize = n; \
}
#define CREATE_HISTOGRAMS(type, buffer) \
/* Clear counters */ \
memset(mHistogram, 0, 256*4*sizeof(uint32)); \
\
/* Prepare for temporal coherence */ \
type PrevVal = (type)buffer[mIndices[0]]; \
bool AlreadySorted = true; /* Optimism... */ \
uint32* Indices = mIndices; \
\
/* Prepare to count */ \
uint8* p = (uint8*)input; \
uint8* pe = &p[nb*4]; \
uint32* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */ \
uint32* h1= &mHistogram[256]; /* Histogram for second pass */ \
uint32* h2= &mHistogram[512]; /* Histogram for third pass */ \
uint32* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */ \
\
while(p!=pe) \
{ \
/* Read input buffer in previous sorted order */ \
type Val = (type)buffer[*Indices++]; \
/* Check whether already sorted or not */ \
if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ \
/* Update for next iteration */ \
PrevVal = Val; \
\
/* Create histograms */ \
h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \
} \
\
/* If all input values are already sorted, we just have to return and leave the */ \
/* previous list unchanged. That way the routine may take advantage of temporal */ \
/* coherence, for example when used to sort transparent faces. */ \
if(AlreadySorted) { mNbHits++; return *this; } \
\
/* Else there has been an early out and we must finish computing the histograms */ \
while(p!=pe) \
{ \
/* Create histograms without the previous overhead */ \
h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \
}
#define CHECK_PASS_VALIDITY(pass) \
/* Shortcut to current counters */ \
uint32* CurCount = &mHistogram[pass<<8]; \
\
/* Reset flag. The sorting pass is supposed to be performed. (default) */ \
bool PerformPass = true; \
\
/* Check pass validity */ \
\
/* If all values have the same byte, sorting is useless. */ \
/* It may happen when sorting bytes or words instead of dwords. */ \
/* This routine actually sorts words faster than dwords, and bytes */ \
/* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \
/* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
\
/* Get first byte */ \
uint8 UniqueVal = *(((uint8*)input)+pass); \
\
/* Check that byte's counter */ \
if(CurCount[UniqueVal]==nb) PerformPass=false;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Main sort routine.
* This one is for integer values. After the call, mIndices contains a list of indices in sorted order, i.e. in the order you may process your data.
* \param input [in] a list of integer values to sort
* \param nb [in] number of values to sort
* \param signedvalues [in] true to handle negative values, false if you know your input buffer only contains positive values
* \return Self-Reference
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort& RadixSort::sort(const uint32* input, uint32 nb, bool signedvalues)
{
uint32 i, j;
// Checkings
if(!input || !nb) return *this;
// Stats
mTotalCalls++;
// Resize lists if needed
CHECK_RESIZE(nb);
#ifdef RADIX_LOCAL_RAM
// Allocate histograms & offsets on the stack
uint32 mHistogram[256*4];
uint32 mOffset[256];
#endif
// Create histograms (counters). Counters for all passes are created in one run.
// Pros: read input buffer once instead of four times
// Cons: mHistogram is 4Kb instead of 1Kb
// We must take care of signed/unsigned values for temporal coherence.... I just
// have 2 code paths even if just a single opcode changes. Self-modifying code, someone?
if(!signedvalues) { CREATE_HISTOGRAMS(uint32, input); }
else { CREATE_HISTOGRAMS(int32, input); }
// Compute #negative values involved if needed
uint32 NbNegativeValues = 0;
if(signedvalues)
{
// An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
// last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
// responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
uint32* h3= &mHistogram[768];
for( i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part
}
// Radix sort, j is the pass number (0=LSB, 3=MSB)
for( j=0;j<4;j++)
{
CHECK_PASS_VALIDITY(j);
// Sometimes the fourth (negative) pass is skipped because all numbers are negative and the MSB is 0xFF (for example). This is
// not a problem, numbers are correctly sorted anyway.
if(PerformPass)
{
// Should we care about negative values?
if(j!=3 || !signedvalues)
{
// Here we deal with positive values only
// Create offsets
mOffset[0] = 0;
for(i=1;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
}
else
{
// This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place.
// Create biased offsets, in order for negative numbers to be sorted as well
mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones
for(i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
// Fixing the wrong place for negative values
mOffset[128] = 0;
for(i=129;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
}
// Perform Radix Sort
uint8* InputBytes = (uint8*)input;
uint32* Indices = mIndices;
uint32* IndicesEnd = &mIndices[nb];
InputBytes += j;
while(Indices!=IndicesEnd)
{
uint32 id = *Indices++;
mIndices2[mOffset[InputBytes[id<<2]]++] = id;
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
uint32* Tmp = mIndices; mIndices = mIndices2; mIndices2 = Tmp;
}
}
return *this;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Main sort routine.
* This one is for floating-point values. After the call, mIndices contains a list of indices in sorted order, i.e. in the order you may process your data.
* \param input [in] a list of floating-point values to sort
* \param nb [in] number of values to sort
* \return Self-Reference
* \warning only sorts IEEE floating-point values
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort& RadixSort::sort(const float* input2, uint32 nb)
{
uint32 i, j;
// Checkings
if(!input2 || !nb) return *this;
// Stats
mTotalCalls++;
uint32* input = (uint32*)input2;
// Resize lists if needed
CHECK_RESIZE(nb);
#ifdef RADIX_LOCAL_RAM
// Allocate histograms & offsets on the stack
uint32 mHistogram[256*4];
uint32 mOffset[256];
#endif
// Create histograms (counters). Counters for all passes are created in one run.
// Pros: read input buffer once instead of four times
// Cons: mHistogram is 4Kb instead of 1Kb
// Floating-point values are always supposed to be signed values, so there's only one code path there.
// Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
// is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
// generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
// wouldn't work with mixed positive/negative values....
{ CREATE_HISTOGRAMS(float, input2); }
// Compute #negative values involved if needed
uint32 NbNegativeValues = 0;
// An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
// last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
// responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
uint32* h3= &mHistogram[768];
for( i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part
// Radix sort, j is the pass number (0=LSB, 3=MSB)
for( j=0;j<4;j++)
{
// Should we care about negative values?
if(j!=3)
{
// Here we deal with positive values only
CHECK_PASS_VALIDITY(j);
if(PerformPass)
{
// Create offsets
mOffset[0] = 0;
for( i=1;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
// Perform Radix Sort
uint8* InputBytes = (uint8*)input;
uint32* Indices = mIndices;
uint32* IndicesEnd = &mIndices[nb];
InputBytes += j;
while(Indices!=IndicesEnd)
{
uint32 id = *Indices++;
mIndices2[mOffset[InputBytes[id<<2]]++] = id;
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
uint32* Tmp = mIndices; mIndices = mIndices2; mIndices2 = Tmp;
}
}
else
{
// This is a special case to correctly handle negative values
CHECK_PASS_VALIDITY(j);
if(PerformPass)
{
// Create biased offsets, in order for negative numbers to be sorted as well
mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones
for(i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
// We must reverse the sorting order for negative numbers!
mOffset[255] = 0;
for(i=0;i<127;i++) mOffset[254-i] = mOffset[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values
for(i=128;i<256;i++) mOffset[i] += CurCount[i]; // Fixing the wrong place for negative values
// Perform Radix Sort
for(i=0;i<nb;i++)
{
uint32 Radix = input[mIndices[i]]>>24; // Radix byte, same as above. AND is useless here (uint32).
// ### cmp to be killed. Not good. Later.
if(Radix<128) mIndices2[mOffset[Radix]++] = mIndices[i]; // Number is positive, same as above
else mIndices2[--mOffset[Radix]] = mIndices[i]; // Number is negative, flip the sorting order
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
uint32* Tmp = mIndices; mIndices = mIndices2; mIndices2 = Tmp;
}
else
{
// The pass is useless, yet we still have to reverse the order of current list if all values are negative.
if(UniqueVal>=128)
{
for(i=0;i<nb;i++) mIndices2[i] = mIndices[nb-i-1];
// Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
uint32* Tmp = mIndices; mIndices = mIndices2; mIndices2 = Tmp;
}
}
}
}
return *this;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Resets the inner indices. After the call, mIndices is reset.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void RadixSort::resetIndices()
{
for(uint32 i=0;i<mCurrentSize;i++) mIndices[i] = i;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Gets the ram used.
* \return memory used in bytes
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
uint32 RadixSort::usedRam() const
{
uint32 UsedRam = sizeof(RadixSort);
#ifndef RADIX_LOCAL_RAM
UsedRam += 256*4*sizeof(uint32); // Histograms
UsedRam += 256*sizeof(uint32); // Offsets
#endif
UsedRam += 2*mCurrentSize*sizeof(uint32); // 2 lists of indices
return UsedRam;
}

69
src/nvcore/Radix.h Normal file
View File

@ -0,0 +1,69 @@
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Contains source code from the article "Radix Sort Revisited".
* \file Radix.h
* \author Pierre Terdiman
* \date April, 4, 2000
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Include Guard
#ifndef NV_CORE_RADIXSORT_H
#define NV_CORE_RADIXSORT_H
#include <nvcore/nvcore.h>
#define RADIX_LOCAL_RAM
class NVCORE_API RadixSort {
NV_FORBID_COPY(RadixSort);
public:
// Constructor/Destructor
RadixSort();
~RadixSort();
// Sorting methods
RadixSort & sort(const uint32* input, uint32 nb, bool signedvalues=true);
RadixSort & sort(const float* input, uint32 nb);
//! Access to results. mIndices is a list of indices in sorted order, i.e. in the order you may further process your data
inline uint32 * indices() const { return mIndices; }
//! mIndices2 gets trashed on calling the sort routine, but otherwise you can recycle it the way you want.
inline uint32 * recyclable() const { return mIndices2; }
// Stats
uint32 usedRam() const;
//! Returns the total number of calls to the radix sorter.
inline uint32 totalCalls() const { return mTotalCalls; }
//! Returns the number of premature exits due to temporal coherence.
inline uint32 hits() const { return mNbHits; }
private:
#ifndef RADIX_LOCAL_RAM
uint32* mHistogram; //!< Counters for each byte
uint32* mOffset; //!< Offsets (nearly a cumulative distribution function)
#endif
uint32 mCurrentSize; //!< Current size of the indices list
uint32 mPreviousSize; //!< Size involved in previous call
uint32* mIndices; //!< Two lists, swapped each pass
uint32* mIndices2;
// Stats
uint32 mTotalCalls;
uint32 mNbHits;
// Internal methods
bool resize(uint32 nb);
void resetIndices();
};
#endif // NV_CORE_RADIXSORT_H

View File

@ -1,9 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "RefCounted.h"
using namespace nv;
int nv::RefCounted::s_total_ref_count = 0;
int nv::RefCounted::s_total_obj_count = 0;

View File

@ -1,114 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_REFCOUNTED_H
#define NV_CORE_REFCOUNTED_H
#include "nvcore.h"
#include "Debug.h"
namespace nv
{
/// Reference counted base class to be used with SmartPtr and WeakPtr.
class RefCounted
{
NV_FORBID_COPY(RefCounted);
public:
/// Ctor.
RefCounted() : m_count(0)/*, m_weak_proxy(NULL)*/
{
s_total_obj_count++;
}
/// Virtual dtor.
virtual ~RefCounted()
{
nvCheck( m_count == 0 );
nvCheck( s_total_obj_count > 0 );
s_total_obj_count--;
}
/// Increase reference count.
uint addRef() const
{
s_total_ref_count++;
m_count++;
return m_count;
}
/// Decrease reference count and remove when 0.
uint release() const
{
nvCheck( m_count > 0 );
s_total_ref_count--;
m_count--;
if( m_count == 0 ) {
// releaseWeakProxy();
delete this;
return 0;
}
return m_count;
}
/*
/// Get weak proxy.
WeakProxy * getWeakProxy() const
{
if (m_weak_proxy == NULL) {
m_weak_proxy = new WeakProxy;
m_weak_proxy->AddRef();
}
return m_weak_proxy;
}
/// Release the weak proxy.
void releaseWeakProxy() const
{
if (m_weak_proxy != NULL) {
m_weak_proxy->NotifyObjectDied();
m_weak_proxy->Release();
m_weak_proxy = NULL;
}
}
*/
/** @name Debug methods: */
//@{
/// Get reference count.
int refCount() const
{
return m_count;
}
/// Get total number of objects.
static int totalObjectCount()
{
return s_total_obj_count;
}
/// Get total number of references.
static int totalReferenceCount()
{
return s_total_ref_count;
}
//@}
private:
NVCORE_API static int s_total_ref_count;
NVCORE_API static int s_total_obj_count;
mutable int m_count;
// mutable WeakProxy * weak_proxy;
};
} // nv namespace
#endif // NV_CORE_REFCOUNTED_H

View File

@ -1,7 +1,5 @@
// This code is in the public domain -- castano@gmail.com
#ifndef NV_CORE_STDSTREAM_H
#define NV_CORE_STDSTREAM_H
#ifndef NV_STDSTREAM_H
#define NV_STDSTREAM_H
#include <nvcore/Stream.h>
@ -368,4 +366,4 @@ private:
} // nv namespace
#endif // NV_CORE_STDSTREAM_H
#endif // NV_STDSTREAM_H

View File

@ -1,6 +1,6 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "StrLib.h"
#include <nvcore/StrLib.h>
#include <math.h> // log
#include <stdio.h> // vsnprintf

View File

@ -3,8 +3,8 @@
#ifndef NV_CORE_STRING_H
#define NV_CORE_STRING_H
#include "nvcore.h"
#include "Containers.h" // swap
#include <nvcore/nvcore.h>
#include <nvcore/Containers.h> // swap
#include <string.h> // strlen, strcmp, etc.

View File

@ -1,160 +1,160 @@
// This code is in the public domain -- castano@gmail.com
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_STREAM_H
#define NV_CORE_STREAM_H
#ifndef NVCORE_STREAM_H
#define NVCORE_STREAM_H
#include "nvcore.h"
#include "Debug.h"
#include <nvcore/nvcore.h>
#include <nvcore/Debug.h>
namespace nv
{
/// Base stream class.
class NVCORE_CLASS Stream {
public:
enum ByteOrder {
LittleEndian = false,
BigEndian = true,
};
/// Get the byte order of the system.
static ByteOrder getSystemByteOrder() {
#if NV_LITTLE_ENDIAN
return LittleEndian;
#else
return BigEndian;
#endif
}
/// Ctor.
Stream() : m_byteOrder(LittleEndian) { }
/// Virtual destructor.
virtual ~Stream() {}
/// Set byte order.
void setByteOrder(ByteOrder bo) { m_byteOrder = bo; }
/// Get byte order.
ByteOrder byteOrder() const { return m_byteOrder; }
/// Serialize the given data.
virtual uint serialize( void * data, uint len ) = 0;
/// Move to the given position in the archive.
virtual void seek( uint pos ) = 0;
/// Return the current position in the archive.
virtual uint tell() const = 0;
/// Return the current size of the archive.
virtual uint size() const = 0;
/// Determine if there has been any error.
virtual bool isError() const = 0;
/// Clear errors.
virtual void clearError() = 0;
/// Return true if the stream is at the end.
virtual bool isAtEnd() const = 0;
/// Return true if the stream is seekable.
virtual bool isSeekable() const = 0;
/// Return true if this is an input stream.
virtual bool isLoading() const = 0;
/// Return true if this is an output stream.
virtual bool isSaving() const = 0;
// friends
friend Stream & operator<<( Stream & s, bool & c ) {
#if NV_OS_DARWIN
nvStaticCheck(sizeof(bool) == 4);
uint8 b = c ? 1 : 0;
s.serialize( &b, 1 );
c = (b == 1);
#else
nvStaticCheck(sizeof(bool) == 1);
s.serialize( &c, 1 );
#endif
return s;
}
friend Stream & operator<<( Stream & s, char & c ) {
nvStaticCheck(sizeof(char) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, uint8 & c ) {
nvStaticCheck(sizeof(uint8) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, int8 & c ) {
nvStaticCheck(sizeof(int8) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, uint16 & c ) {
nvStaticCheck(sizeof(uint16) == 2);
return s.byteOrderSerialize( &c, 2 );
}
friend Stream & operator<<( Stream & s, int16 & c ) {
nvStaticCheck(sizeof(int16) == 2);
return s.byteOrderSerialize( &c, 2 );
}
friend Stream & operator<<( Stream & s, uint32 & c ) {
nvStaticCheck(sizeof(uint32) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, int32 & c ) {
nvStaticCheck(sizeof(int32) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, uint64 & c ) {
nvStaticCheck(sizeof(uint64) == 8);
return s.byteOrderSerialize( &c, 8 );
}
friend Stream & operator<<( Stream & s, int64 & c ) {
nvStaticCheck(sizeof(int64) == 8);
return s.byteOrderSerialize( &c, 8 );
}
friend Stream & operator<<( Stream & s, float & c ) {
nvStaticCheck(sizeof(float) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, double & c ) {
nvStaticCheck(sizeof(double) == 8);
return s.byteOrderSerialize( &c, 8 );
}
protected:
/// Serialize in the stream byte order.
Stream & byteOrderSerialize( void * v, uint len ) {
if( m_byteOrder == getSystemByteOrder() ) {
serialize( v, len );
}
else {
for( uint i = len; i > 0; i-- ) {
serialize( (uint8 *)v + i - 1, 1 );
}
}
return *this;
}
private:
ByteOrder m_byteOrder;
/// Base stream class.
class NVCORE_CLASS Stream {
public:
enum ByteOrder {
LittleEndian = false,
BigEndian = true,
};
/// Get the byte order of the system.
static ByteOrder getSystemByteOrder() {
# if NV_LITTLE_ENDIAN
return LittleEndian;
# else
return BigEndian;
# endif
}
/// Ctor.
Stream() : m_byteOrder(LittleEndian) { }
/// Virtual destructor.
virtual ~Stream() {}
/// Set byte order.
void setByteOrder(ByteOrder bo) { m_byteOrder = bo; }
/// Get byte order.
ByteOrder byteOrder() const { return m_byteOrder; }
/// Serialize the given data.
virtual uint serialize( void * data, uint len ) = 0;
/// Move to the given position in the archive.
virtual void seek( uint pos ) = 0;
/// Return the current position in the archive.
virtual uint tell() const = 0;
/// Return the current size of the archive.
virtual uint size() const = 0;
/// Determine if there has been any error.
virtual bool isError() const = 0;
/// Clear errors.
virtual void clearError() = 0;
/// Return true if the stream is at the end.
virtual bool isAtEnd() const = 0;
/// Return true if the stream is seekable.
virtual bool isSeekable() const = 0;
/// Return true if this is an input stream.
virtual bool isLoading() const = 0;
/// Return true if this is an output stream.
virtual bool isSaving() const = 0;
// friends
friend Stream & operator<<( Stream & s, bool & c ) {
# if NV_OS_DARWIN
nvStaticCheck(sizeof(bool) == 4);
uint8 b = c ? 1 : 0;
s.serialize( &b, 1 );
c = (b == 1);
# else
nvStaticCheck(sizeof(bool) == 1);
s.serialize( &c, 1 );
# endif
return s;
}
friend Stream & operator<<( Stream & s, char & c ) {
nvStaticCheck(sizeof(char) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, uint8 & c ) {
nvStaticCheck(sizeof(uint8) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, int8 & c ) {
nvStaticCheck(sizeof(int8) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, uint16 & c ) {
nvStaticCheck(sizeof(uint16) == 2);
return s.byteOrderSerialize( &c, 2 );
}
friend Stream & operator<<( Stream & s, int16 & c ) {
nvStaticCheck(sizeof(int16) == 2);
return s.byteOrderSerialize( &c, 2 );
}
friend Stream & operator<<( Stream & s, uint32 & c ) {
nvStaticCheck(sizeof(uint32) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, int32 & c ) {
nvStaticCheck(sizeof(int32) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, uint64 & c ) {
nvStaticCheck(sizeof(uint64) == 8);
return s.byteOrderSerialize( &c, 8 );
}
friend Stream & operator<<( Stream & s, int64 & c ) {
nvStaticCheck(sizeof(int64) == 8);
return s.byteOrderSerialize( &c, 8 );
}
friend Stream & operator<<( Stream & s, float & c ) {
nvStaticCheck(sizeof(float) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, double & c ) {
nvStaticCheck(sizeof(double) == 8);
return s.byteOrderSerialize( &c, 8 );
}
protected:
/// Serialize in the stream byte order.
Stream & byteOrderSerialize( void * v, uint len ) {
if( m_byteOrder == getSystemByteOrder() ) {
serialize( v, len );
}
else {
for( uint i = len; i > 0; i-- ) {
serialize( (uint8 *)v + i - 1, 1 );
}
}
return *this;
}
private:
ByteOrder m_byteOrder;
};
} // nv namespace
#endif // NV_CORE_STREAM_H
#endif // NV_STREAM_H

View File

@ -1,6 +1,6 @@
// This code is in the public domain -- castano@gmail.com
// This code is in the public domain -- castanyo@yahoo.es
#include "TextReader.h"
#include <nvcore/TextReader.h>
using namespace nv;
@ -48,7 +48,7 @@ const char * TextReader::readToEnd()
m_text.reserve(size + 1);
m_text.resize(size);
m_stream->serialize(m_text.mutableBuffer(), size);
m_stream->serialize(m_text.unsecureBuffer(), size);
m_text.pushBack('\0');
return m_text.buffer();

View File

@ -1,10 +1,11 @@
// This code is in the public domain -- castano@gmail.com
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_TEXTREADER_H
#define NV_CORE_TEXTREADER_H
#ifndef NVCORE_TEXTREADER_H
#define NVCORE_TEXTREADER_H
#include "Containers.h"
#include "Stream.h"
#include <nvcore/nvcore.h>
#include <nvcore/Stream.h>
#include <nvcore/Containers.h>
namespace nv
{
@ -34,4 +35,4 @@ private:
} // nv namespace
#endif // NV_CORE_TEXTREADER_H
#endif // NVCORE_TEXTREADER_H

View File

@ -1,6 +1,6 @@
// This code is in the public domain -- castano@gmail.com
// This code is in the public domain -- castanyo@yahoo.es
#include "TextWriter.h"
#include <nvcore/TextWriter.h>
using namespace nv;

View File

@ -1,10 +1,11 @@
// This code is in the public domain -- castano@gmail.com
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_TEXTWRITER_H
#define NV_CORE_TEXTWRITER_H
#ifndef NVCORE_TEXTWRITER_H
#define NVCORE_TEXTWRITER_H
#include "StrLib.h"
#include "Stream.h"
#include <nvcore/nvcore.h>
#include <nvcore/Stream.h>
#include <nvcore/StrLib.h>
namespace nv
{

View File

@ -1,60 +0,0 @@
// This code is in the public domain -- castano@gmail.com
#ifndef NV_CORE_TIMER_H
#define NV_CORE_TIMER_H
#include "nvcore.h"
#if 1
#include <time.h> //clock
class NVCORE_CLASS Timer
{
public:
Timer() {}
void start() { m_start = clock(); }
void stop() { m_stop = clock(); }
float elapsed() const { return float(m_stop - m_start) / CLOCKS_PER_SEC; }
private:
clock_t m_start;
clock_t m_stop;
};
#else
#define WINDOWS_LEAN_AND_MEAN
#define VC_EXTRALEAN
#define NOMINMAX
#include <windows.h>
class NVCORE_CLASS Timer
{
public:
Timer() {
// get the tick frequency from the OS
QueryPerformanceFrequency((LARGE_INTEGER*) &m_frequency);
}
void start() { QueryPerformanceCounter((LARGE_INTEGER*) &m_start); }
void stop() { QueryPerformanceCounter((LARGE_INTEGER*) &m_stop); }
int elapsed() const {
return (int)1000 * ((double)m_stop.QuadPart - (double)m_start.QuadPart) / (double)m_frequency.QuadPart;
}
private:
LARGE_INTEGER m_frequency;
LARGE_INTEGER m_start;
LARGE_INTEGER m_stop;
};
#endif // 0
#endif // NV_CORE_TIMER_H

229
src/nvcore/Tokenizer.cpp Normal file
View File

@ -0,0 +1,229 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Tokenizer.h>
#include <nvcore/StrLib.h>
#include <stdio.h> // vsscanf
#include <stdarg.h> // va_list
#include <stdlib.h> // atof, atoi
#if NV_CC_MSVC
#if 0 // This doesn't work on MSVC for x64
/* vsscanf for Win32
* Written 5/2003 by <mgix@mgix.com>
* This code is in the Public Domain
*/
#include <malloc.h> // alloca
//#include <string.h>
static int vsscanf(const char * buffer, const char * format, va_list argPtr)
{
// Get an upper bound for the # of args
size_t count = 0;
const char *p = format;
while(1) {
char c = *(p++);
if(c==0) break;
if(c=='%' && (p[0]!='*' && p[0]!='%')) ++count;
}
// Make a local stack
size_t stackSize = (2+count)*sizeof(void*);
void **newStack = (void**)alloca(stackSize);
// Fill local stack the way sscanf likes it
newStack[0] = (void*)buffer;
newStack[1] = (void*)format;
memcpy(newStack+2, argPtr, count*sizeof(void*));
// @@ Use: CALL DWORD PTR [sscanf]
// Warp into system sscanf with new stack
int result;
void *savedESP;
__asm
{
mov savedESP, esp
mov esp, newStack
#if _MSC_VER >= 1400
call DWORD PTR [sscanf_s]
#else
call DWORD PTR [sscanf]
#endif
mov esp, savedESP
mov result, eax
}
return result;
}
#endif
#endif
using namespace nv;
Token::Token() :
m_str(""), m_len(0)
{
}
Token::Token(const Token & token) :
m_str(token.m_str), m_len(token.m_len)
{
}
Token::Token(const char * str, int len) :
m_str(str), m_len(len)
{
}
bool Token::operator==(const char * str) const
{
return strncmp(m_str, str, m_len) == 0;
}
bool Token::operator!=(const char * str) const
{
return strncmp(m_str, str, m_len) != 0;
}
bool Token::isNull()
{
return m_len != 0;
}
float Token::toFloat() const
{
return float(atof(m_str));
}
int Token::toInt() const
{
return atoi(m_str);
}
uint Token::toUnsignedInt() const
{
// @@ TBD
return uint(atoi(m_str));
}
String Token::toString() const
{
return String(m_str, m_len);
}
bool Token::parse(const char * format, int count, ...) const
{
va_list arg;
va_start(arg, count);
int readCount = vsscanf(m_str, format, arg);
va_end(arg);
return readCount == count;
}
Tokenizer::Tokenizer(Stream * stream) :
m_reader(stream), m_lineNumber(0), m_columnNumber(0), m_delimiters("{}()="), m_spaces(" \t")
{
}
bool Tokenizer::nextLine(bool skipEmptyLines /*= true*/)
{
do {
if (!readLine()) {
return false;
}
}
while (!readToken() && skipEmptyLines);
return true;
}
bool Tokenizer::nextToken(bool skipEndOfLine /*= false*/)
{
if (!readToken()) {
if (!skipEndOfLine) {
return false;
}
else {
return nextLine(true);
}
}
return true;
}
bool Tokenizer::readToken()
{
skipSpaces();
const char * begin = m_line + m_columnNumber;
if (*begin == '\0') {
return false;
}
char c = readChar();
if (isDelimiter(c)) {
m_token = Token(begin, 1);
return true;
}
// @@ Add support for quoted tokens "", ''
int len = 0;
while (!isDelimiter(c) && !isSpace(c) && c != '\0') {
c = readChar();
len++;
}
m_columnNumber--;
m_token = Token(begin, len);
return true;
}
char Tokenizer::readChar()
{
return m_line[m_columnNumber++];
}
bool Tokenizer::readLine()
{
m_lineNumber++;
m_columnNumber = 0;
m_line = m_reader.readLine();
return m_line != NULL;
}
void Tokenizer::skipSpaces()
{
while (isSpace(readChar())) {}
m_columnNumber--;
}
bool Tokenizer::isSpace(char c)
{
uint i = 0;
while (m_spaces[i] != '\0') {
if (c == m_spaces[i]) {
return true;
}
i++;
}
return false;
}
bool Tokenizer::isDelimiter(char c)
{
uint i = 0;
while (m_delimiters[i] != '\0') {
if (c == m_delimiters[i]) {
return true;
}
i++;
}
return false;
}

99
src/nvcore/Tokenizer.h Normal file
View File

@ -0,0 +1,99 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_TOKENIZER_H
#define NV_CORE_TOKENIZER_H
#include <nvcore/nvcore.h>
#include <nvcore/Stream.h>
#include <nvcore/TextReader.h>
#include <nvcore/StrLib.h>
namespace nv
{
/// A token produced by the Tokenizer.
class NVCORE_CLASS Token
{
public:
Token();
Token(const Token & token);
Token(const char * str, int len);
bool operator==(const char * str) const;
bool operator!=(const char * str) const;
bool isNull();
float toFloat() const;
int toInt() const;
uint toUnsignedInt() const;
String toString() const;
bool parse(const char * format, int count, ...) const __attribute__((format (scanf, 2, 4)));
private:
const char * m_str;
int m_len;
};
/// Exception thrown by the tokenizer.
class TokenizerException
{
public:
TokenizerException(int line, int column) : m_line(line), m_column(column) {}
int line() const { return m_line; }
int column() const { return m_column; }
private:
int m_line;
int m_column;
};
// @@ Use enums instead of bools for clarity!
//enum SkipEmptyLines { skipEmptyLines, noSkipEmptyLines };
//enum SkipEndOfLine { skipEndOfLine, noSkipEndOfLine };
/// A simple stream tokenizer.
class NVCORE_CLASS Tokenizer
{
public:
Tokenizer(Stream * stream);
bool nextLine(bool skipEmptyLines = true);
bool nextToken(bool skipEndOfLine = false);
const Token & token() const { return m_token; }
int lineNumber() const { return m_lineNumber; }
int columnNumber() const { return m_columnNumber; }
void setDelimiters(const char * str) { m_delimiters = str; }
const char * delimiters() const { return m_delimiters; }
void setSpaces(const char * str) { m_spaces = str; }
const char * spaces() const { return m_spaces; }
private:
char readChar();
bool readLine();
bool readToken();
void skipSpaces();
bool isSpace(char c);
bool isDelimiter(char c);
private:
TextReader m_reader;
const char * m_line;
Token m_token;
int m_lineNumber;
int m_columnNumber;
const char * m_delimiters;
const char * m_spaces;
};
} // nv namespace
#endif // NV_CORE_TOKENIZER_H

View File

@ -22,7 +22,7 @@
// Platform definitions
#include <posh.h>
#include "poshlib/posh.h"
// OS:
// NV_OS_WIN32
@ -38,9 +38,6 @@
#if defined POSH_OS_LINUX
# define NV_OS_LINUX 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_FREEBSD
# define NV_OS_FREEBSD 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_CYGWIN32
# define NV_OS_CYGWIN 1
#elif defined POSH_OS_MINGW
@ -129,13 +126,6 @@
#define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
#define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3)
#define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3
#define NV_STRING2(x) #x
#define NV_STRING(x) NV_STRING2(x)
#if NV_CC_GNUC
#define NV_FILE_LINE __FILE__ ":" NV_STRING(__LINE__) ": "
#else
#define NV_FILE_LINE __FILE__ "(" NV_STRING(__LINE__) ") : "
#endif
// Startup initialization macro.
#define NV_AT_STARTUP(some_code) \
@ -168,7 +158,7 @@
#elif NV_CC_GNUC
# if NV_OS_LINUX
# include "DefsGnucLinux.h"
# elif NV_OS_DARWIN || NV_OS_FREEBSD
# elif NV_OS_DARWIN
# include "DefsGnucDarwin.h"
# elif NV_OS_MINGW
# include "DefsGnucWin32.h"

View File

@ -0,0 +1,7 @@
SET(POSHLIB_SRCS
posh.c
posh.h)
ADD_LIBRARY(posh STATIC ${POSHLIB_SRCS})

1006
src/nvcore/poshlib/posh.c Normal file

File diff suppressed because it is too large Load Diff

1007
src/nvcore/poshlib/posh.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -21,13 +21,10 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "BlockDXT.h"
#include <nvimage/ColorBlock.h>
#include <nvcore/Stream.h>
#include <nvcore/Containers.h> // swap
#include "ColorBlock.h"
#include "BlockDXT.h"
using namespace nv;
@ -39,9 +36,9 @@ using namespace nv;
uint BlockDXT1::evaluatePalette(Color32 color_array[4]) const
{
// Does bit expansion before interpolation.
color_array[0].r = (col0.r << 3) | (col0.r >> 2);
color_array[0].g = (col0.g << 2) | (col0.g >> 4);
color_array[0].b = (col0.b << 3) | (col0.b >> 2);
color_array[0].g = (col0.g << 2) | (col0.g >> 4);
color_array[0].r = (col0.r << 3) | (col0.r >> 2);
color_array[0].a = 0xFF;
// @@ Same as above, but faster?
@ -93,51 +90,6 @@ uint BlockDXT1::evaluatePalette(Color32 color_array[4]) const
}
}
uint BlockDXT1::evaluatePaletteNV5x(Color32 color_array[4]) const
{
// Does bit expansion before interpolation.
color_array[0].r = (3 * col0.r * 22) / 8;
color_array[0].g = (col0.g << 2) | (col0.g >> 4);
color_array[0].b = (3 * col0.b * 22) / 8;
color_array[0].a = 0xFF;
color_array[1].r = (3 * col1.r * 22) / 8;
color_array[1].g = (col1.g << 2) | (col1.g >> 4);
color_array[1].b = (3 * col1.b * 22) / 8;
color_array[1].a = 0xFF;
if( col0.u > col1.u ) {
// Four-color block: derive the other two colors.
color_array[2].r = (2 * col0.r + col1.r) * 22 / 8;
color_array[2].g = (256 * color_array[0].g + (color_array[1].g - color_array[0].g)/4 + 128 + (color_array[1].g - color_array[0].g) * 80) / 256;
color_array[2].b = (2 * col0.b + col1.b) * 22 / 8;
color_array[2].a = 0xFF;
color_array[3].r = (2 * col1.r + col0.r) * 22 / 8;
color_array[3].g = (256 * color_array[1].g + (color_array[0].g - color_array[1].g)/4 + 128 + (color_array[0].g - color_array[1].g) * 80) / 256;
color_array[3].b = (2 * col1.b + col0.b) * 22 / 8;
color_array[3].a = 0xFF;
return 4;
}
else {
// Three-color block: derive the other color.
color_array[2].r = (col0.r + col1.r) * 33 / 8;
color_array[2].g = (256 * color_array[0].g + (color_array[1].g - color_array[0].g)/4 + 128 + (color_array[1].g - color_array[0].g) * 128) / 256;
color_array[2].b = (col0.b + col1.b) * 33 / 8;
color_array[2].a = 0xFF;
// Set all components to 0 to match DXT specs.
color_array[3].r = 0x00; // color_array[2].r;
color_array[3].g = 0x00; // color_array[2].g;
color_array[3].b = 0x00; // color_array[2].b;
color_array[3].a = 0x00;
return 3;
}
}
// Evaluate palette assuming 3 color block.
void BlockDXT1::evaluatePalette3(Color32 color_array[4]) const
{
@ -190,6 +142,95 @@ void BlockDXT1::evaluatePalette4(Color32 color_array[4]) const
}
/* Jason Dorie's code.
// ----------------------------------------------------------------------------
// Build palette for a 3 color + traparent black block
// ----------------------------------------------------------------------------
void DXTCGen::BuildCodes3(cbVector *pVects, cbVector &v1, cbVector &v2)
{
//pVects[0] = v1;
//pVects[2] = v2;
//pVects[1][0] = v1[0];
//pVects[1][1] = (BYTE)( ((long)v1[1] + (long)v2[1]) / 2 );
//pVects[1][2] = (BYTE)( ((long)v1[2] + (long)v2[2]) / 2 );
//pVects[1][3] = (BYTE)( ((long)v1[3] + (long)v2[3]) / 2 );
__asm {
mov ecx, dword ptr pVects
mov eax, dword ptr v1
mov ebx, dword ptr v2
movd mm0, [eax]
movd mm1, [ebx]
pxor mm2, mm2
nop
movd [ecx], mm0
movd [ecx+8], mm1
punpcklbw mm0, mm2
punpcklbw mm1, mm2
paddw mm0, mm1
psrlw mm0, 1
packuswb mm0, mm0
movd [ecx+4], mm0
}
// *(long *)&pVects[1] = r1;
}
__int64 ScaleOneThird = 0x5500550055005500;
// ----------------------------------------------------------------------------
// Build palette for a 4 color block
// ----------------------------------------------------------------------------
void DXTCGen::BuildCodes4(cbVector *pVects, cbVector &v1, cbVector &v2)
{
// pVects[0] = v1;
// pVects[3] = v2;
//
// pVects[1][0] = v1[0];
// pVects[1][1] = (BYTE)( ((long)v1[1] * 2 + (long)v2[1]) / 3 );
// pVects[1][2] = (BYTE)( ((long)v1[2] * 2 + (long)v2[2]) / 3 );
// pVects[1][3] = (BYTE)( ((long)v1[3] * 2 + (long)v2[3]) / 3 );
//
// pVects[2][0] = v1[0];
// pVects[2][1] = (BYTE)( ((long)v2[1] * 2 + (long)v1[1]) / 3 );
// pVects[2][2] = (BYTE)( ((long)v2[2] * 2 + (long)v1[2]) / 3 );
// pVects[2][3] = (BYTE)( ((long)v2[3] * 2 + (long)v1[3]) / 3 );
__asm {
mov ecx, dword ptr pVects
mov eax, dword ptr v1
mov ebx, dword ptr v2
movd mm0, [eax]
movd mm1, [ebx]
pxor mm2, mm2
movd [ecx], mm0
movd [ecx+12], mm1
punpcklbw mm0, mm2
punpcklbw mm1, mm2
movq mm3, mm0 // mm3 = v0
paddw mm0, mm1 // mm0 = v0 + v1
paddw mm3, mm3 // mm3 = v0*2
paddw mm0, mm1 // mm0 = v0 + v1*2
paddw mm1, mm3 // mm1 = v0*2 + v1
pmulhw mm0, ScaleOneThird
pmulhw mm1, ScaleOneThird
packuswb mm1, mm0
movq [ecx+4], mm1
}
}
*/
void BlockDXT1::decodeBlock(ColorBlock * block) const
{
nvDebugCheck(block != NULL);
@ -207,24 +248,6 @@ void BlockDXT1::decodeBlock(ColorBlock * block) const
}
}
void BlockDXT1::decodeBlockNV5x(ColorBlock * block) const
{
nvDebugCheck(block != NULL);
// Decode color block.
Color32 color_array[4];
evaluatePaletteNV5x(color_array);
// Write color block.
for( uint j = 0; j < 4; j++ ) {
for( uint i = 0; i < 4; i++ ) {
uint idx = (row[j] >> (2 * i)) & 3;
block->color(i, j) = color_array[idx];
}
}
}
void BlockDXT1::setIndices(int * idx)
{
indices = 0;
@ -263,14 +286,6 @@ void BlockDXT3::decodeBlock(ColorBlock * block) const
alpha.decodeBlock(block);
}
void BlockDXT3::decodeBlockNV5x(ColorBlock * block) const
{
nvDebugCheck(block != NULL);
color.decodeBlockNV5x(block);
alpha.decodeBlock(block);
}
void AlphaBlockDXT3::decodeBlock(ColorBlock * block) const
{
nvDebugCheck(block != NULL);
@ -451,17 +466,7 @@ void BlockDXT5::decodeBlock(ColorBlock * block) const
// Decode alpha.
alpha.decodeBlock(block);
}
void BlockDXT5::decodeBlockNV5x(ColorBlock * block) const
{
nvDebugCheck(block != NULL);
// Decode color.
color.decodeBlockNV5x(block);
// Decode alpha.
alpha.decodeBlock(block);
}
/// Flip DXT5 block vertically.

View File

@ -47,13 +47,11 @@ namespace nv
bool isFourColorMode() const;
uint evaluatePalette(Color32 color_array[4]) const;
uint evaluatePaletteNV5x(Color32 color_array[4]) const;
uint evaluatePaletteFast(Color32 color_array[4]) const;
void evaluatePalette3(Color32 color_array[4]) const;
void evaluatePalette4(Color32 color_array[4]) const;
void decodeBlock(ColorBlock * block) const;
void decodeBlockNV5x(ColorBlock * block) const;
void setIndices(int * idx);
@ -107,7 +105,6 @@ namespace nv
BlockDXT1 color;
void decodeBlock(ColorBlock * block) const;
void decodeBlockNV5x(ColorBlock * block) const;
void flip4();
void flip2();
@ -163,7 +160,6 @@ namespace nv
BlockDXT1 color;
void decodeBlock(ColorBlock * block) const;
void decodeBlockNV5x(ColorBlock * block) const;
void flip4();
void flip2();

View File

@ -1,71 +1,68 @@
PROJECT(nvimage)
SET(IMAGE_SRCS
nvimage.h
FloatImage.h
FloatImage.cpp
Filter.h
Filter.cpp
Image.h
Image.cpp
ImageIO.h
ImageIO.cpp
ColorBlock.h
ColorBlock.cpp
BlockDXT.h
BlockDXT.cpp
DirectDrawSurface.h
DirectDrawSurface.cpp
Quantize.h
Quantize.cpp
NormalMap.h
NormalMap.cpp
PsdFile.h
TgaFile.h
ColorSpace.h
ColorSpace.cpp)
nvimage.h
FloatImage.h
FloatImage.cpp
Filter.h
Filter.cpp
Image.h
Image.cpp
ImageIO.h
ImageIO.cpp
ColorBlock.h
ColorBlock.cpp
BlockDXT.h
BlockDXT.cpp
HoleFilling.h
HoleFilling.cpp
DirectDrawSurface.h
DirectDrawSurface.cpp
Quantize.h
Quantize.cpp
NormalMap.h
NormalMap.cpp
NormalMipmap.h
NormalMipmap.cpp
PsdFile.h
TgaFile.h)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
IF(PNG_FOUND)
SET(LIBS ${LIBS} ${PNG_LIBRARIES})
INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR})
SET(LIBS ${LIBS} ${PNG_LIBRARIES})
INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR})
ENDIF(PNG_FOUND)
IF(JPEG_FOUND)
SET(LIBS ${LIBS} ${JPEG_LIBRARIES})
INCLUDE_DIRECTORIES(${JPEG_INCLUDE_DIR})
SET(LIBS ${LIBS} ${JPEG_LIBRARIES})
INCLUDE_DIRECTORIES(${JPEG_INCLUDE_DIR})
ENDIF(JPEG_FOUND)
IF(TIFF_FOUND)
SET(LIBS ${LIBS} ${TIFF_LIBRARIES})
INCLUDE_DIRECTORIES(${TIFF_INCLUDE_DIR})
SET(LIBS ${LIBS} ${TIFF_LIBRARIES})
INCLUDE_DIRECTORIES(${TIFF_INCLUDE_DIR})
ENDIF(TIFF_FOUND)
IF(OPENEXR_FOUND)
SET(LIBS ${LIBS} ${OPENEXR_LIBRARIES})
INCLUDE_DIRECTORIES(${OPENEXR_INCLUDE_PATHS})
SET(LIBS ${LIBS} ${OPENEXR_LIBRARIES})
INCLUDE_DIRECTORIES(${OPENEXR_INCLUDE_PATHS})
ENDIF(OPENEXR_FOUND)
IF(FREEIMAGE_FOUND)
SET(LIBS ${LIBS} ${FREEIMAGE_LIBRARIES})
INCLUDE_DIRECTORIES(${FREEIMAGE_INCLUDE_PATH})
ENDIF(FREEIMAGE_FOUND)
# targets
ADD_DEFINITIONS(-DNVIMAGE_EXPORTS)
IF(NVIMAGE_SHARED)
ADD_DEFINITIONS(-DNVIMAGE_SHARED=1)
ADD_LIBRARY(nvimage SHARED ${IMAGE_SRCS})
IF(NVIMAGE_SHARED)
ADD_DEFINITIONS(-DNVIMAGE_SHARED=1)
ADD_LIBRARY(nvimage SHARED ${IMAGE_SRCS})
ELSE(NVIMAGE_SHARED)
ADD_LIBRARY(nvimage ${IMAGE_SRCS})
ADD_LIBRARY(nvimage ${IMAGE_SRCS})
ENDIF(NVIMAGE_SHARED)
TARGET_LINK_LIBRARIES(nvimage ${LIBS} nvcore nvmath posh)
INSTALL(TARGETS nvimage
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib/static)
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib/static)

View File

@ -1,6 +1,5 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Containers.h> // swap
#include <nvmath/Box.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/Image.h>
@ -58,9 +57,11 @@ void ColorBlock::init(const Image * img, uint x, uint y)
const uint bw = min(img->width() - x, 4U);
const uint bh = min(img->height() - y, 4U);
nvDebugCheck(bw != 0 && bh != 0);
static const int remainder[] = {
nvDebugCheck(bw != 0);
nvDebugCheck(bh != 0);
static int remainder[] = {
0, 0, 0, 0,
0, 1, 0, 1,
0, 1, 2, 0,
@ -81,129 +82,51 @@ void ColorBlock::init(const Image * img, uint x, uint y)
}
}
void ColorBlock::init(uint w, uint h, uint * data, uint x, uint y)
void ColorBlock::swizzleDXT5n()
{
nvDebugCheck(data != NULL);
const uint bw = min(w - x, 4U);
const uint bh = min(h - y, 4U);
nvDebugCheck(bw != 0 && bh != 0);
// Blocks that are smaller than 4x4 are handled by repeating the pixels.
// @@ Thats only correct when block size is 1, 2 or 4, but not with 3. :(
for (uint i = 0; i < 4; i++)
{
const int by = i % bh;
for (uint e = 0; e < 4; e++)
{
const int bx = e % bw;
const uint idx = (y + by) * w + x + bx;
color(e, i).u = data[idx];
}
}
}
void ColorBlock::init(uint w, uint h, float * data, uint x, uint y)
{
nvDebugCheck(data != NULL);
const uint bw = min(w - x, 4U);
const uint bh = min(h - y, 4U);
nvDebugCheck(bw != 0 && bh != 0);
// Blocks that are smaller than 4x4 are handled by repeating the pixels.
// @@ Thats only correct when block size is 1, 2 or 4, but not with 3. :(
for (uint i = 0; i < 4; i++)
{
const uint by = i % bh;
for (uint e = 0; e < 4; e++)
{
const uint bx = e % bw;
const uint idx = ((y + by) * w + x + bx) * 4;
Color32 & c = color(e, i);
c.r = uint8(255 * clamp(data[idx + 0], 0.0f, 1.0f));
c.g = uint8(255 * clamp(data[idx + 1], 0.0f, 1.0f));
c.b = uint8(255 * clamp(data[idx + 2], 0.0f, 1.0f));
c.a = uint8(255 * clamp(data[idx + 3], 0.0f, 1.0f));
}
}
}
static inline uint8 component(Color32 c, uint i)
{
if (i == 0) return c.r;
if (i == 1) return c.g;
if (i == 2) return c.b;
if (i == 3) return c.a;
if (i == 4) return 0xFF;
return 0;
}
void ColorBlock::swizzle(uint x, uint y, uint z, uint w)
{
for (int i = 0; i < 16; i++)
for(int i = 0; i < 16; i++)
{
Color32 c = m_color[i];
m_color[i].r = component(c, x);
m_color[i].g = component(c, y);
m_color[i].b = component(c, z);
m_color[i].a = component(c, w);
m_color[i] = Color32(0xFF, c.g, 0, c.r);
}
}
void ColorBlock::splatX()
{
for(int i = 0; i < 16; i++)
{
uint8 x = m_color[i].r;
m_color[i] = Color32(x, x, x, x);
}
}
void ColorBlock::splatY()
{
for(int i = 0; i < 16; i++)
{
uint8 y = m_color[i].g;
m_color[i] = Color32(y, y, y, y);
}
}
/// Returns true if the block has a single color.
bool ColorBlock::isSingleColor() const
{
Color32 mask(0xFF, 0xFF, 0xFF, 0x00);
uint u = m_color[0].u & mask.u;
for (int i = 1; i < 16; i++)
{
if (u != (m_color[i].u & mask.u))
{
return false;
}
}
return true;
Color32 mask(0xFF, 0xFF, 0xFF, 0x00);
uint u = m_color[0].u & mask.u;
for (int i = 1; i < 16; i++)
{
if (u != (m_color[i].u & mask.u))
{
return false;
}
}
return true;
}
/*
/// Returns true if the block has a single color, ignoring transparent pixels.
bool ColorBlock::isSingleColorNoAlpha() const
{
Color32 c;
int i;
for(i = 0; i < 16; i++)
{
if (m_color[i].a != 0) {
c = m_color[i];
break;
}
}
Color32 mask(0xFF, 0xFF, 0xFF, 0x00);
uint u = c.u & mask.u;
for(; i < 16; i++)
{
if (u != (m_color[i].u & mask.u))
{
return false;
}
}
return true;
}
*/
/// Count number of unique colors in this color block.
uint ColorBlock::countUniqueColors() const
{

View File

@ -18,13 +18,12 @@ namespace nv
ColorBlock(const Image * img, uint x, uint y);
void init(const Image * img, uint x, uint y);
void init(uint w, uint h, uint * data, uint x, uint y);
void init(uint w, uint h, float * data, uint x, uint y);
void swizzle(uint x, uint y, uint z, uint w); // 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0
void swizzleDXT5n();
void splatX();
void splatY();
bool isSingleColor() const;
//bool isSingleColorNoAlpha() const;
uint countUniqueColors() const;
Color32 averageColor() const;
bool hasAlpha() const;

View File

@ -1,70 +0,0 @@
// This code is in the public domain -- jim@tilander.org
#include <nvcore/nvcore.h>
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include "ColorSpace.h"
namespace nv
{
void ColorSpace::RGBtoYCoCg_R(Image* img)
{
const uint w = img->width();
const uint h = img->height();
for( uint y=0; y < h; y++ )
{
for( uint x=0; x < w; x++ )
{
Color32 pixel = img->pixel(x, y);
const int r = pixel.r;
const int g = pixel.g;
const int b = pixel.b;
const int Co = r - b;
const int t = b + Co/2;
const int Cg = g - t;
const int Y = t + Cg/2;
// Just saturate the chroma here (we loose out of one bit in each channel)
// this just means that we won't have as high dynamic range. Perhaps a better option
// is to loose the least significant bit instead?
pixel.r = clamp(Co + 128, 0, 255);
pixel.g = clamp(Cg + 128, 0, 255);
pixel.b = 0;
pixel.a = Y;
}
}
}
void ColorSpace::YCoCg_RtoRGB(Image* img)
{
const uint w = img->width();
const uint h = img->height();
for( uint y=0; y < h; y++ )
{
for( uint x=0; x < w; x++ )
{
Color32 pixel = img->pixel(x, y);
const int Co = (int)pixel.r - 128;
const int Cg = (int)pixel.g - 128;
const int Y = pixel.a;
const int t = Y - Cg/2;
const int g = Cg + t;
const int b = t - Co/2;
const int r = b + Co;
pixel.r = r;
pixel.g = g;
pixel.b = b;
pixel.a = 1;
}
}
}
}

View File

@ -1,21 +0,0 @@
// This code is in the public domain -- jim@tilander.org
#ifndef NV_IMAGE_COLORSPACE_H
#define NV_IMAGE_COLORSPACE_H
namespace nv
{
class Image;
// Defines simple mappings between different color spaces and encodes them in the
// input image.
namespace ColorSpace
{
void RGBtoYCoCg_R(Image* img);
void YCoCg_RtoRGB(Image* img);
}
}
#endif

122
src/nvimage/ConeMap.cpp Normal file
View File

@ -0,0 +1,122 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Ptr.h>
#include <nvmath/Color.h>
#include <nvimage/NormalMap.h>
#include <nvimage/Filter.h>
#include <nvimage/FloatImage.h>
#include <nvimage/Image.h>
using namespace nv;
static float processPixel(const FloatImage * img, uint x, uint y)
{
nvDebugCheck(img != NULL);
const uint w = img->width();
const uint h = img->height();
float d = img->pixel(x, y, 0);
float fx0 = (float) x / w;
float fy0 = (float) y / h;
float best_ratio = INF;
uint best_x = w;
uint best_y = h;
for (uint yy = 0; yy < h; yy++)
{
for (uint xx = 0; xx < w; xx++)
{
float ch = d - img->pixel(xx, yy, 0);
if (ch > 0)
{
float dx = float(xx - x);
float dy = float(yy - y);
float ratio = (dx * dx + dy * dy) / ch;
if (ratio < best_ratio)
{
best_x = xx;
best_y = yy;
}
}
}
}
if (best_x != w)
{
nvDebugCheck(best_y !=h);
float dx = float(best_x - x) / w;
float dy = float(best_y - y) / h;
float cw = sqrtf(dx*dx + dy*dy);
float ch = d - img->pixel(xx, yy, 0);
return min(1, sqrtf(cw / ch));
}
return 1;
}
// Create cone map using the given kernels.
FloatImage * createConeMap(const Image * img, Vector4::Arg heightWeights)
{
nvCheck(img != NULL);
const uint w = img->width();
const uint h = img->height();
AutoPtr<FloatImage> fimage(new FloatImage());
//fimage->allocate(2, w, h);
fimage->allocate(4, w, h);
// Compute height and store in red channel:
float * heightChannel = fimage->channel(0);
for(uint i = 0; i < w*h; i++)
{
Vector4 color = toVector4(img->pixel(i));
heightChannel[i] = dot(color, heightWeights);
}
// Compute cones:
for(uint y = 0; y < h; y++)
{
for(uint x = 0; x < w; x++)
{
processPixel(fimage, x, y);
}
}
return fimage.release();
}

View File

@ -1,40 +1,39 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSORRGB_H
#define NV_TT_COMPRESSORRGB_H
#include "nvtt.h"
#include "Compressor.h"
namespace nv
{
struct PixelFormatConverter : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
} // nv namespace
#endif // NV_TT_COMPRESSORRGB_H
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_IMAGE_CONEMAP_H
#define NV_IMAGE_CONEMAP_H
#include <nvmath/Vector.h>
#include <nvimage/nvimage.h>
namespace nv
{
class Image;
class FloatImage;
FloatImage * createConeMap(const Image * img, Vector4::Arg heightWeights);
} // nv namespace
#endif // NV_IMAGE_CONEMAP_H

View File

@ -21,16 +21,16 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Containers.h> // max
#include <nvcore/StdStream.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/Image.h>
#include <nvimage/BlockDXT.h>
#include <nvimage/PixelFormat.h>
#include <nvcore/Debug.h>
#include <nvcore/Containers.h> // max
#include <nvcore/StdStream.h>
#include <string.h> // memset
@ -406,14 +406,10 @@ namespace nv
s << pf.flags;
s << pf.fourcc;
s << pf.bitcount;
s.serialize(&pf.rmask, sizeof(pf.rmask));
s.serialize(&pf.gmask, sizeof(pf.gmask));
s.serialize(&pf.bmask, sizeof(pf.bmask));
s.serialize(&pf.amask, sizeof(pf.amask));
// s << pf.rmask;
// s << pf.gmask;
// s << pf.bmask;
// s << pf.amask;
s << pf.rmask;
s << pf.gmask;
s << pf.bmask;
s << pf.amask;
return s;
}
@ -449,9 +445,7 @@ namespace nv
s << header.pitch;
s << header.depth;
s << header.mipmapcount;
for (int i = 0; i < 11; i++) {
s << header.reserved[i];
}
s.serialize(header.reserved, 11 * sizeof(uint));
s << header.pf;
s << header.caps;
s << header.notused;
@ -538,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (2 << 16) | (1 << 8) | (0); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (8); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;
@ -576,7 +570,7 @@ void DDSHeader::setHeight(uint h)
void DDSHeader::setDepth(uint d)
{
this->flags |= DDSD_DEPTH;
this->depth = d;
this->height = d;
}
void DDSHeader::setMipmapCount(uint count)
@ -605,7 +599,6 @@ void DDSHeader::setMipmapCount(uint count)
void DDSHeader::setTexture2D()
{
this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D;
this->header10.arraySize = 1;
}
void DDSHeader::setTexture3D()
@ -613,7 +606,6 @@ void DDSHeader::setTexture3D()
this->caps.caps2 = DDSCAPS2_VOLUME;
this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE3D;
this->header10.arraySize = 1;
}
void DDSHeader::setTextureCube()
@ -644,33 +636,22 @@ void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
// set fourcc pixel format.
this->pf.flags = DDPF_FOURCC;
this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3);
this->pf.bitcount = 0;
this->pf.rmask = 0;
this->pf.gmask = 0;
this->pf.bmask = 0;
this->pf.amask = 0;
}
void DDSHeader::setFormatCode(uint32 code)
{
// set fourcc pixel format.
this->pf.flags = DDPF_FOURCC;
this->pf.fourcc = code;
this->pf.bitcount = 0;
if (this->pf.fourcc == FOURCC_ATI2)
{
this->pf.bitcount = FOURCC_A2XY;
}
else
{
this->pf.bitcount = 0;
}
this->pf.rmask = 0;
this->pf.gmask = 0;
this->pf.bmask = 0;
this->pf.amask = 0;
}
void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
{
this->pf.bitcount = MAKEFOURCC(c0, c1, c2, c3);
}
void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask)
{
// Make sure the masks are correct.
@ -681,17 +662,10 @@ void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask
nvCheck((gmask & amask) == 0);
nvCheck((bmask & amask) == 0);
if (rmask != 0 || gmask != 0 || bmask != 0)
{
this->pf.flags = DDPF_RGB;
if (amask != 0) {
this->pf.flags |= DDPF_ALPHAPIXELS;
}
}
else if (amask != 0)
{
this->pf.flags |= DDPF_ALPHA;
this->pf.flags = DDPF_RGB;
if (amask != 0) {
this->pf.flags |= DDPF_ALPHAPIXELS;
}
if (bitcount == 0)
@ -733,12 +707,6 @@ void DDSHeader::setNormalFlag(bool b)
else this->pf.flags &= ~DDPF_NORMAL;
}
void DDSHeader::setHasAlphaFlag(bool b)
{
if (b) this->pf.flags |= DDPF_ALPHAPIXELS;
else this->pf.flags &= ~DDPF_ALPHAPIXELS;
}
void DDSHeader::swapBytes()
{
this->fourcc = POSH_LittleU32(this->fourcc);
@ -791,15 +759,6 @@ DirectDrawSurface::DirectDrawSurface(const char * name) : stream(new StdInputStr
}
}
DirectDrawSurface::DirectDrawSurface(Stream * s) : stream(s)
{
if (!stream->isError())
{
(*stream) << header;
}
}
DirectDrawSurface::~DirectDrawSurface()
{
delete stream;
@ -839,16 +798,6 @@ bool DirectDrawSurface::isSupported() const
if (header.hasDX10Header())
{
if (header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM ||
header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM ||
header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM ||
header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM ||
header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM)
{
return true;
}
return false;
}
else
{
@ -892,41 +841,6 @@ bool DirectDrawSurface::isSupported() const
return true;
}
bool DirectDrawSurface::hasAlpha() const
{
if (header.hasDX10Header())
{
#pragma message(NV_FILE_LINE "TODO: Update hasAlpha to handle all DX10 formats.")
return
header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM ||
header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM ||
header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM;
}
else
{
if (header.pf.flags & DDPF_RGB)
{
return header.pf.amask != 0;
}
else if (header.pf.flags & DDPF_FOURCC)
{
if (header.pf.fourcc == FOURCC_RXGB ||
header.pf.fourcc == FOURCC_ATI1 ||
header.pf.fourcc == FOURCC_ATI2 ||
header.pf.flags & DDPF_NORMAL)
{
return false;
}
else
{
// @@ Here we could check the ALPHA_PIXELS flag, but nobody sets it.
return true;
}
}
return false;
}
}
uint DirectDrawSurface::mipmapCount() const
{
@ -1005,13 +919,6 @@ void DirectDrawSurface::setNormalFlag(bool b)
header.setNormalFlag(b);
}
void DirectDrawSurface::setHasAlphaFlag(bool b)
{
nvDebugCheck(isValid());
header.setHasAlphaFlag(b);
}
void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap)
{
nvDebugCheck(isValid());
@ -1029,32 +936,15 @@ void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap)
}
img->allocate(w, h);
if (hasAlpha())
if (header.pf.flags & DDPF_RGB)
{
img->setFormat(Image::Format_ARGB);
readLinearImage(img);
}
else
else if (header.pf.flags & DDPF_FOURCC)
{
img->setFormat(Image::Format_RGB);
}
if (header.hasDX10Header())
{
// So far only block formats supported.
readBlockImage(img);
}
else
{
if (header.pf.flags & DDPF_RGB)
{
readLinearImage(img);
}
else if (header.pf.flags & DDPF_FOURCC)
{
readBlockImage(img);
}
}
}
void DirectDrawSurface::readLinearImage(Image * img)
@ -1079,7 +969,16 @@ void DirectDrawSurface::readLinearImage(Image * img)
uint byteCount = (header.pf.bitcount + 7) / 8;
#pragma message(NV_FILE_LINE "TODO: Support floating point linear images and other FOURCC codes.")
// set image format: RGB or ARGB
// alpha channel exists if and only if the alpha mask is non-zero
if (header.pf.amask == 0)
{
img->setFormat(Image::Format_RGB);
}
else
{
img->setFormat(Image::Format_ARGB);
}
// Read linear RGB images.
for (uint y = 0; y < h; y++)
@ -1105,6 +1004,19 @@ void DirectDrawSurface::readBlockImage(Image * img)
nvDebugCheck(stream != NULL);
nvDebugCheck(img != NULL);
// set image format: RGB or ARGB
if (header.pf.fourcc == FOURCC_RXGB ||
header.pf.fourcc == FOURCC_ATI1 ||
header.pf.fourcc == FOURCC_ATI2 ||
header.pf.flags & DDPF_NORMAL)
{
img->setFormat(Image::Format_RGB);
}
else
{
img->setFormat(Image::Format_ARGB);
}
const uint w = img->width();
const uint h = img->height();
@ -1149,33 +1061,20 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
nvDebugCheck(stream != NULL);
nvDebugCheck(rgba != NULL);
uint fourcc = header.pf.fourcc;
// Map DX10 block formats to fourcc codes.
if (header.hasDX10Header())
{
if (header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM) fourcc = FOURCC_DXT1;
if (header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM) fourcc = FOURCC_DXT3;
if (header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM) fourcc = FOURCC_DXT5;
if (header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM) fourcc = FOURCC_ATI1;
if (header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM) fourcc = FOURCC_ATI2;
}
if (fourcc == FOURCC_DXT1)
if (header.pf.fourcc == FOURCC_DXT1)
{
BlockDXT1 block;
*stream << block;
block.decodeBlock(rgba);
}
else if (fourcc == FOURCC_DXT2 ||
else if (header.pf.fourcc == FOURCC_DXT2 ||
header.pf.fourcc == FOURCC_DXT3)
{
BlockDXT3 block;
*stream << block;
block.decodeBlock(rgba);
}
else if (fourcc == FOURCC_DXT4 ||
else if (header.pf.fourcc == FOURCC_DXT4 ||
header.pf.fourcc == FOURCC_DXT5 ||
header.pf.fourcc == FOURCC_RXGB)
{
@ -1183,7 +1082,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
*stream << block;
block.decodeBlock(rgba);
if (fourcc == FOURCC_RXGB)
if (header.pf.fourcc == FOURCC_RXGB)
{
// Swap R & A.
for (int i = 0; i < 16; i++)
@ -1195,13 +1094,13 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
}
}
}
else if (fourcc == FOURCC_ATI1)
else if (header.pf.fourcc == FOURCC_ATI1)
{
BlockATI1 block;
*stream << block;
block.decodeBlock(rgba);
}
else if (fourcc == FOURCC_ATI2)
else if (header.pf.fourcc == FOURCC_ATI2)
{
BlockATI2 block;
*stream << block;
@ -1211,7 +1110,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
// If normal flag set, convert to normal.
if (header.pf.flags & DDPF_NORMAL)
{
if (fourcc == FOURCC_ATI2)
if (header.pf.fourcc == FOURCC_ATI2)
{
for (int i = 0; i < 16; i++)
{
@ -1219,7 +1118,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
c = buildNormal(c.r, c.g);
}
}
else if (fourcc == FOURCC_DXT5)
else if (header.pf.fourcc == FOURCC_DXT5)
{
for (int i = 0; i < 16; i++)
{
@ -1245,27 +1144,6 @@ uint DirectDrawSurface::blockSize() const
case FOURCC_RXGB:
case FOURCC_ATI2:
return 16;
case FOURCC_DX10:
switch(header.header10.dxgiFormat)
{
case DXGI_FORMAT_BC1_TYPELESS:
case DXGI_FORMAT_BC1_UNORM:
case DXGI_FORMAT_BC1_UNORM_SRGB:
case DXGI_FORMAT_BC4_TYPELESS:
case DXGI_FORMAT_BC4_UNORM:
case DXGI_FORMAT_BC4_SNORM:
return 8;
case DXGI_FORMAT_BC2_TYPELESS:
case DXGI_FORMAT_BC2_UNORM:
case DXGI_FORMAT_BC2_UNORM_SRGB:
case DXGI_FORMAT_BC3_TYPELESS:
case DXGI_FORMAT_BC3_UNORM:
case DXGI_FORMAT_BC3_UNORM_SRGB:
case DXGI_FORMAT_BC5_TYPELESS:
case DXGI_FORMAT_BC5_UNORM:
case DXGI_FORMAT_BC5_SNORM:
return 16;
};
};
// Not a block image.

View File

@ -93,12 +93,9 @@ namespace nv
void setLinearSize(uint size);
void setPitch(uint pitch);
void setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3);
void setFormatCode(uint code);
void setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3);
void setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask);
void setDX10Format(uint format);
void setNormalFlag(bool b);
void setHasAlphaFlag(bool b);
void swapBytes();
@ -113,13 +110,10 @@ namespace nv
{
public:
DirectDrawSurface(const char * file);
DirectDrawSurface(Stream * stream);
~DirectDrawSurface();
bool isValid() const;
bool isSupported() const;
bool hasAlpha() const;
uint mipmapCount() const;
uint width() const;
@ -131,7 +125,6 @@ namespace nv
bool isTextureCube() const;
void setNormalFlag(bool b);
void setHasAlphaFlag(bool b);
void mipmap(Image * img, uint f, uint m);
// void mipmap(FloatImage * img, uint f, uint m);

View File

@ -17,6 +17,21 @@ using namespace nv;
namespace
{
static int iround(float f)
{
return int(f);
}
static int ifloor(float f)
{
return int(floor(f));
}
static float frac(float f)
{
return f - floor(f);
}
static int mirror(int x, int w)
{
x = abs(x);
@ -172,12 +187,12 @@ void FloatImage::normalize(uint base_component)
void FloatImage::packNormals(uint base_component)
{
scaleBias(base_component, 3, 0.5f, 0.5f);
scaleBias(base_component, 3, 0.5f, 1.0f);
}
void FloatImage::expandNormals(uint base_component)
{
scaleBias(base_component, 3, 2.0f, -1.0f);
scaleBias(base_component, 3, 2, -0.5);
}
void FloatImage::scaleBias(uint base_component, uint num, float scale, float bias)
@ -188,7 +203,7 @@ void FloatImage::scaleBias(uint base_component, uint num, float scale, float bia
float * ptr = this->channel(base_component + c);
for(uint i = 0; i < size; i++) {
ptr[i] = scale * ptr[i] + bias;
ptr[i] = scale * (ptr[i] + bias);
}
}
}
@ -227,57 +242,6 @@ void FloatImage::exponentiate(uint base_component, uint num, float power)
}
}
/// Apply linear transform.
void FloatImage::transform(uint base_component, const Matrix & m, Vector4::Arg offset)
{
nvCheck(base_component + 4 <= m_componentNum);
const uint size = m_width * m_height;
float * r = this->channel(base_component + 0);
float * g = this->channel(base_component + 1);
float * b = this->channel(base_component + 2);
float * a = this->channel(base_component + 3);
for (uint i = 0; i < size; i++)
{
Vector4 color = nv::transform(m, Vector4(*r, *g, *b, *a)) + offset;
*r++ = color.x();
*g++ = color.y();
*b++ = color.z();
*a++ = color.w();
}
}
void FloatImage::swizzle(uint base_component, uint r, uint g, uint b, uint a)
{
nvCheck(base_component + 4 <= m_componentNum);
nvCheck(r < 7 && g < 7 && b < 7 && a < 7);
const uint size = m_width * m_height;
float consts[] = { 1.0f, 0.0f, -1.0f };
float * c[7];
c[0] = this->channel(base_component + 0);
c[1] = this->channel(base_component + 1);
c[2] = this->channel(base_component + 2);
c[3] = this->channel(base_component + 3);
c[4] = consts;
c[5] = consts + 1;
c[6] = consts + 2;
for (uint i = 0; i < size; i++)
{
float tmp[4] = { *c[r], *c[g], *c[b], *c[a] };
*c[0]++ = tmp[0];
*c[1]++ = tmp[1];
*c[2]++ = tmp[2];
*c[3]++ = tmp[3];
}
}
float FloatImage::sampleNearest(const float x, const float y, const int c, const WrapMode wm) const
{
if( wm == WrapMode_Clamp ) return sampleNearestClamp(x, y, c);
@ -628,7 +592,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
float * dst_channel = dst_image->channel(c);
for (uint x = 0; x < w; x++) {
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer());
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer());
for (uint y = 0; y < h; y++) {
dst_channel[y * w + x] = tmp_column[y];
@ -649,7 +613,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
float * tmp_channel = tmp_image->channel(c);
for (uint x = 0; x < w; x++) {
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer());
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer());
for (uint y = 0; y < h; y++) {
tmp_channel[y * w + x] = tmp_column[y];
@ -701,7 +665,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
float * dst_channel = dst_image->channel(c);
for (uint x = 0; x < w; x++) {
tmp_image->applyKernelVertical(ykernel, x, c, alpha, wm, tmp_column.mutableBuffer());
tmp_image->applyKernelVertical(ykernel, x, c, alpha, wm, tmp_column.unsecureBuffer());
for (uint y = 0; y < h; y++) {
dst_channel[y * w + x] = tmp_column[y];
@ -926,25 +890,6 @@ void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c,
}
}
// Vertical flip in place.
void FloatImage::flip()
{
const uint w = m_width;
const uint h = m_height;
const uint h2 = h / 2;
for (uint c = 0; c < m_componentNum; c++) {
for (uint y = 0; y < h2; y++) {
float * src = scanline(y, c);
float * dst = scanline(h - 1 - y, c);
for (uint x = 0; x < w; x++) {
swap(src[x], dst[x]);
}
}
}
}
FloatImage* FloatImage::clone() const
{
FloatImage* copy = new FloatImage();

View File

@ -8,7 +8,7 @@
#include <nvmath/Vector.h>
#include <nvcore/Debug.h>
#include <nvcore/Algorithms.h> // clamp
#include <nvcore/Containers.h> // clamp
#include <stdlib.h> // abs
@ -68,15 +68,14 @@ public:
NVIMAGE_API void toGamma(uint base_component, uint num, float gamma = 2.2f);
NVIMAGE_API void exponentiate(uint base_component, uint num, float power);
NVIMAGE_API void transform(uint base_component, const Matrix & m, const Vector4 & offset);
NVIMAGE_API void swizzle(uint base_component, uint r, uint g, uint b, uint a);
NVIMAGE_API FloatImage * fastDownSample() const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const;
//@}
NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const;
@ -85,9 +84,7 @@ public:
NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void flip();
//@}
uint width() const { return m_width; }
uint height() const { return m_height; }

753
src/nvimage/HoleFilling.cpp Normal file
View File

@ -0,0 +1,753 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Containers.h>
#include <nvcore/Ptr.h>
#include <nvmath/nvmath.h>
#include <nvimage/HoleFilling.h>
#include <nvimage/FloatImage.h>
using namespace nv;
// This is a variation of Sapiro's inpainting method.
void nv::fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap)
{
nvCheck(img != NULL);
nvCheck(bmap != NULL);
const int w = img->width();
const int h = img->height();
const int count = img->componentNum();
nvCheck(bmap->width() == uint(w));
nvCheck(bmap->height() == uint(h));
AutoPtr<BitMap> newbmap(new BitMap(w, h));
for(int p = 0; p < passCount; p++)
{
for(int c = 0; c < count; c++)
{
float * channel = img->channel(c);
for(int y = 0; y < h; y++) {
for(int x = 0; x < w; x++) {
if (bmap->bitAt(x, y)) {
// Not a hole.
newbmap->setBitAt(x, y);
continue;
}
const bool west = bmap->bitAt(img->indexClamp(x-1, y));
const bool east = bmap->bitAt(img->indexClamp(x+1, y));
const bool north = bmap->bitAt(img->indexClamp(x, y-1));
const bool south = bmap->bitAt(img->indexClamp(x, y+1));
const bool northwest = bmap->bitAt(img->indexClamp(x-1, y-1));
const bool northeast = bmap->bitAt(img->indexClamp(x+1, y-1));
const bool southwest = bmap->bitAt(img->indexClamp(x-1, y+1));
const bool southeast = bmap->bitAt(img->indexClamp(x+1, y+1));
int num = west + east + north + south + northwest + northeast + southwest + southeast;
if (num != 0) {
float average = 0.0f;
if (num == 3 && west && northwest && southwest) {
average = channel[img->indexClamp(x-1, y)];
}
else if (num == 3 && east && northeast && southeast) {
average = channel[img->indexClamp(x+1, y)];
}
else if (num == 3 && north && northwest && northeast) {
average = channel[img->indexClamp(x, y-1)];
}
else if (num == 3 && south && southwest && southeast) {
average = channel[img->indexClamp(x, y+1)];
}
else {
float total = 0.0f;
if (west) { average += 1 * channel[img->indexClamp(x-1, y)]; total += 1; }
if (east) { average += 1 * channel[img->indexClamp(x+1, y)]; total += 1; }
if (north) { average += 1 * channel[img->indexClamp(x, y-1)]; total += 1; }
if (south) { average += 1 * channel[img->indexClamp(x, y+1)]; total += 1; }
if (northwest) { average += channel[img->indexClamp(x-1, y-1)]; ++total; }
if (northeast) { average += channel[img->indexClamp(x+1, y-1)]; ++total; }
if (southwest) { average += channel[img->indexClamp(x-1, y+1)]; ++total; }
if (southeast) { average += channel[img->indexClamp(x+1, y+1)]; ++total; }
average /= total;
}
channel[img->indexClamp(x, y)] = average;
newbmap->setBitAt(x, y);
}
}
}
}
// Update the bit mask.
swap(*newbmap, *bmap);
}
}
namespace {
struct Neighbor {
uint16 x;
uint16 y;
uint32 d;
};
// Compute euclidean squared distance.
static uint dist( uint16 ax, uint16 ay, uint16 bx, uint16 by ) {
int dx = bx - ax;
int dy = by - ay;
return uint(dx*dx + dy*dy);
}
// Check neighbour, this is the core of the EDT algorithm.
static void checkNeighbour( int x, int y, Neighbor * e, const Neighbor & n ) {
nvDebugCheck(e != NULL);
uint d = dist( x, y, n.x, n.y );
if( d < e->d ) {
e->x = n.x;
e->y = n.y;
e->d = d;
}
}
} // namespace
// Voronoi filling using EDT-4
void nv::fillVoronoi(FloatImage * img, const BitMap * bmap)
{
nvCheck(img != NULL);
const int w = img->width();
const int h = img->height();
const int count = img->componentNum();
nvCheck(bmap->width() == uint(w));
nvCheck(bmap->height() == uint(h));
Array<Neighbor> edm;
edm.resize(w * h);
int x, y;
int x0, x1, y0, y1;
// Init edm.
for( y = 0; y < h; y++ ) {
for( x = 0; x < w; x++ ) {
if( bmap->bitAt(x, y) ) {
edm[y * w + x].x = x;
edm[y * w + x].y = y;
edm[y * w + x].d = 0;
}
else {
edm[y * w + x].x = w;
edm[y * w + x].y = h;
edm[y * w + x].d = w*w + h*h;
}
}
}
// First pass.
for( y = 0; y < h; y++ ) {
for( x = 0; x < w; x++ ) {
x0 = clamp(x-1, 0, w-1); // @@ Wrap?
x1 = clamp(x+1, 0, w-1);
y0 = clamp(y-1, 0, h-1);
Neighbor & e = edm[y * w + x];
checkNeighbour(x, y, &e, edm[y0 * w + x0]);
checkNeighbour(x, y, &e, edm[y0 * w + x]);
checkNeighbour(x, y, &e, edm[y0 * w + x1]);
checkNeighbour(x, y, &e, edm[y * w + x0]);
}
for( x = w-1; x >= 0; x-- ) {
x1 = clamp(x+1, 0, w-1);
Neighbor & e = edm[y * w + x];
checkNeighbour(x, y, &e, edm[y * w + x1]);
}
}
// Third pass.
for( y = h-1; y >= 0; y-- ) {
for( x = w-1; x >= 0; x-- ) {
x0 = clamp(x-1, 0, w-1);
x1 = clamp(x+1, 0, w-1);
y1 = clamp(y+1, 0, h-1);
Neighbor & e = edm[y * w + x];
checkNeighbour(x, y, &e, edm[y * w + x1]);
checkNeighbour(x, y, &e, edm[y1 * w + x0]);
checkNeighbour(x, y, &e, edm[y1 * w + x]);
checkNeighbour(x, y, &e, edm[y1 * w + x1]);
}
for( x = 0; x < w; x++ ) {
x0 = clamp(x-1, 0, w-1);
Neighbor & e = edm[y * w + x];
checkNeighbour(x, y, &e, edm[y * w + x0]);
}
}
// Fill empty holes.
for( y = 0; y < h; y++ ) {
for( x = 0; x < w; x++ ) {
const int sx = edm[y * w + x].x;
const int sy = edm[y * w + x].y;
nvDebugCheck(sx < w && sy < h);
if( sx != x || sy != y ) {
for(int c = 0; c < count; c++ ) {
img->setPixel(img->pixel(sx, sy, c), x, y, c);
}
}
}
}
}
void nv::fillBlur(FloatImage * img, const BitMap * bmap)
{
nvCheck(img != NULL);
// @@ Apply a 3x3 kernel.
}
static bool downsample(const FloatImage * src, const BitMap * srcMask, const FloatImage ** _dst, const BitMap ** _dstMask)
{
const uint w = src->width();
const uint h = src->height();
const uint count = src->componentNum();
// count holes in srcMask, return false if fully filled.
uint holes = 0;
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
holes += srcMask->bitAt(x, y) == 0;
}
}
if (holes == 0 || (w == 2 || h == 2)) {
// Stop when no holes or when the texture is very small.
return false;
}
// Apply box filter to image and mask and return true.
const uint nw = w / 2;
const uint nh = h / 2;
FloatImage * dst = new FloatImage();
dst->allocate(count, nw, nh);
BitMap * dstMask = new BitMap(nw, nh);
for(uint c = 0; c < count; c++) {
for(uint y = 0; y < nh; y++) {
for(uint x = 0; x < nw; x++) {
const uint x0 = 2 * x + 0;
const uint x1 = 2 * x + 1;
const uint y0 = 2 * y + 0;
const uint y1 = 2 * y + 1;
const float f0 = src->pixel(x0, y0, c);
const float f1 = src->pixel(x1, y0, c);
const float f2 = src->pixel(x0, y1, c);
const float f3 = src->pixel(x1, y1, c);
const bool b0 = srcMask->bitAt(x0, y0);
const bool b1 = srcMask->bitAt(x1, y0);
const bool b2 = srcMask->bitAt(x0, y1);
const bool b3 = srcMask->bitAt(x1, y1);
if (b0 || b1 || b2 || b3) {
// Set bit mask.
dstMask->setBitAt(x, y);
// Set pixel.
float value = 0.0f;
int total = 0;
if (b0) { value += f0; total++; }
if (b1) { value += f1; total++; }
if (b2) { value += f2; total++; }
if (b3) { value += f3; total++; }
dst->setPixel(value / total, x, y, c);
}
}
}
}
*_dst = dst;
*_dstMask = dstMask;
return true;
}
// This is the filter used in the Lumigraph paper.
void nv::fillPullPush(FloatImage * img, const BitMap * bmap)
{
nvCheck(img != NULL);
const uint count = img->componentNum();
const uint w = img->width();
const uint h = img->height();
const uint num = log2(max(w,h));
// Build mipmap chain.
Array<const FloatImage *> mipmaps(num);
Array<const BitMap *> mipmapMasks(num);
mipmaps.append(img);
mipmapMasks.append(bmap);
const FloatImage * current;
const BitMap * currentMask;
// Compute mipmap chain.
while(downsample(mipmaps.back(), mipmapMasks.back(), &current, &currentMask))
{
mipmaps.append(current);
mipmapMasks.append(currentMask);
}
// Sample mipmaps until non-hole is found.
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
int sx = x;
int sy = y;
//float sx = x;
//float sy = y;
const uint levelCount = mipmaps.count();
for (uint l = 0; l < levelCount; l++)
{
//const float fx = sx / mipmaps[l]->width();
//const float fy = sy / mipmaps[l]->height();
if (mipmapMasks[l]->bitAt(sx, sy))
{
// Sample mipmaps[l](sx, sy) and copy to img(x, y)
for(uint c = 0; c < count; c++) {
//img->setPixel(mipmaps[l]->linear_clamp(fx, fy, c), x, y, c);
img->setPixel(mipmaps[l]->pixel(sx, sy, c), x, y, c);
}
break;
}
sx /= 2;
sy /= 2;
}
}
}
// Don't delete the original image and mask.
mipmaps[0] = NULL;
mipmapMasks[0] = NULL;
// Delete the mipmaps.
deleteAll(mipmaps);
deleteAll(mipmapMasks);
}
/*
This Code is from Charles Bloom:
DoPixelSeamFix
10-20-02
Looks in the 5x5 local neighborhood (LocalPixels) of the desired pixel to fill.
It tries to build a quadratic model of the neighborhood surface to use in
extrapolating. You need 5 pixels to establish a 2d quadratic curve.
This is really just a nice generic way to extrapolate pixels. It also happens
to work great for seam-fixing.
Note that I'm working on normals, but I treat them just as 3 scalars and normalize
at the end. To be more correct, I would work on the surface of a sphere, but that
just seems like way too much work.
*/
struct LocalPixels
{
// 5x5 neighborhood
// the center is at result
// index [y][x]
bool fill[5][5];
float data[5][5];
mutable float result;
mutable float weight;
bool Quad3SubH(float * pQ, int row) const
{
const bool * pFill = fill[row];
const float * pDat = data[row];
if ( pFill[1] && pFill[2] && pFill[3] )
{
// good row
*pQ = pDat[1] - 2.f * pDat[2] + pDat[3];
return true;
}
else if ( pFill[0] && pFill[1] && pFill[2] )
{
// good row
*pQ = pDat[0] - 2.f * pDat[1] + pDat[2];
return true;
}
else if ( pFill[2] && pFill[3] && pFill[4] )
{
// good row
*pQ = pDat[2] - 2.f * pDat[3] + pDat[4];
return true;
}
return false;
}
// improve result with a horizontal quad in row 1 and/or
bool Quad3SubV(float * pQ, int col) const
{
if ( fill[1][col] && fill[2][col] && fill[3][col] )
{
// good row
*pQ = data[1][col] - 2.f * data[2][col] + data[3][col];
return true;
}
else if ( fill[0][col] && fill[1][col] && fill[2][col] )
{
// good row
*pQ = data[0][col] - 2.f * data[1][col] + data[2][col];
return true;
}
else if ( fill[2][col] && fill[3][col] && fill[4][col] )
{
// good row
*pQ = data[2][col] - 2.f * data[3][col] + data[4][col];
return true;
}
return false;
}
bool Quad3H(float * pQ) const
{
if (!Quad3SubH(pQ,1))
{
return Quad3SubH(pQ,3);
}
float q = 0.0f; // initializer not needed, just make it shut up
if (Quad3SubH(&q, 3))
{
// got q and pQ
*pQ = (*pQ+q)*0.5f;
}
return true;
}
bool Quad3V(float * pQ) const
{
if (!Quad3SubV(pQ, 1))
{
return Quad3SubV(pQ, 3);
}
float q = 0.0f; // initializer not needed, just make it shut up
if (Quad3SubV(&q, 3))
{
// got q and pQ
*pQ = (*pQ + q) * 0.5f;
}
return true;
}
// Quad returns ([0]+[2] - 2.f*[1])
// a common want is [1] - ([0]+[2])*0.5f ;
// so use -0.5f*Quad
bool tryQuads() const
{
bool res = false;
// look for a pair that straddles the middle:
if ( fill[2][1] && fill[2][3] )
{
// got horizontal straddle
float q;
if ( Quad3H(&q) )
{
result += (data[2][1] + data[2][3] - q) * 0.5f;
weight += 1.f;
res = true;
}
}
if ( fill[1][2] && fill[3][2] )
{
// got vertical straddle
float q;
if ( Quad3V(&q) )
{
result += (data[1][2] + data[3][2] - q) * 0.5f;
weight += 1.f;
res = true;
}
}
// look for pairs that lead into the middle :
if ( fill[2][0] && fill[2][1] )
{
// got left-side pair
float q;
if ( Quad3H(&q) )
{
result += data[2][1]*2.f - data[2][0] + q;
weight += 1.f;
res = true;
}
}
if ( fill[2][3] && fill[2][4] )
{
// got right-side pair
float q;
if ( Quad3H(&q) )
{
result += data[2][3]*2.f - data[2][4] + q;
weight += 1.f;
res = true;
}
}
if ( fill[0][2] && fill[1][2] )
{
// got left-side pair
float q;
if ( Quad3V(&q) )
{
result += data[1][2]*2.f - data[0][2] + q;
weight += 1.f;
res = true;
}
}
if ( fill[3][2] && fill[4][2] )
{
// got right-side pair
float q;
if ( Quad3V(&q) )
{
result += data[3][2]*2.f - data[4][2] + q;
weight += 1.f;
res = true;
}
}
return res;
}
bool tryPlanar() const
{
// four cases :
const int indices[] =
{
2,1, 1,2, 1,1,
2,1, 3,2, 3,1,
2,3, 1,2, 1,3,
2,3, 3,2, 3,3
};
bool res = false;
for (int i = 0; i < 4; i++)
{
const int * I = indices + i*6;
if (!fill[ I[0] ][ I[1] ])
continue;
if (!fill[ I[2] ][ I[3] ])
continue;
if (!fill[ I[4] ][ I[5] ])
continue;
result += data[ I[0] ][ I[1] ] + data[ I[2] ][ I[3] ] - data[ I[4] ][ I[5] ];
weight += 1.0f;
res = true;
}
return res;
}
bool tryTwos() const
{
bool res = false;
if (fill[2][1] && fill[2][3])
{
result += (data[2][1] + data[2][3]) * 0.5f;
weight += 1.0f;
res = true;
}
if (fill[1][2] && fill[3][2])
{
result += (data[1][2] + data[3][2]) * 0.5f;
weight += 1.0f;
res = true;
}
// four side-rotates :
const int indices[] =
{
2,1, 2,0,
2,3, 2,4,
1,2, 0,2,
3,2, 4,2,
};
for (int i = 0; i < 4; i++)
{
const int * I = indices + i*4;
if (!fill[ I[0] ][ I[1] ])
continue;
if (!fill[ I[2] ][ I[3] ])
continue;
result += data[ I[0] ][ I[1] ]*2.0f - data[ I[2] ][ I[3] ];
weight += 1.0f;
res = true;
}
return res;
}
bool doLocalPixelFill() const
{
result = 0.0f;
weight = 0.0f;
if (tryQuads()) {
return true;
}
if (tryPlanar()) {
return true;
}
return tryTwos();
}
}; // struct LocalPixels
// This is a quadratic extrapolation filter from Charles Bloom (DoPixelSeamFix). Used with his permission.
void nv::fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/)
{
nvCheck(passCount > 0);
nvCheck(img != NULL);
nvCheck(bmap != NULL);
const int w = img->width();
const int h = img->height();
const int count = img->componentNum();
nvCheck(bmap->width() == uint(w));
nvCheck(bmap->height() == uint(h));
AutoPtr<BitMap> newbmap( new BitMap(w, h) );
float * coverageChannel = NULL;
if (coverageIndex != -1)
{
coverageChannel = img->channel(coverageIndex);
}
int firstChannel = -1;
for (int p = 0; p < passCount; p++)
{
for (int c = 0; c < count; c++)
{
if (c == coverageIndex) continue;
if (firstChannel == -1) firstChannel = c;
float * channel = img->channel(c);
for (int yb = 0; yb < h; yb++) {
for (int xb = 0; xb < w; xb++) {
if (bmap->bitAt(xb, yb)) {
// Not a hole.
newbmap->setBitAt(xb, yb);
continue;
}
int numFill = 0;
LocalPixels lp;
for (int ny = 0; ny < 5; ny++)
{
int y = (yb + ny - 2);
if ( y < 0 || y >= h )
{
// out of range
for(int i = 0; i < 5; i++)
{
lp.fill[ny][i] = false;
}
continue;
}
for (int nx = 0; nx < 5; nx++)
{
int x = (xb + nx - 2);
if (x < 0 || x >= w)
{
lp.fill[ny][nx] = false;
}
else
{
int idx = img->index(x, y);
if (!bmap->bitAt(idx))
{
lp.fill[ny][nx] = false;
}
else
{
lp.fill[ny][nx] = true;
lp.data[ny][nx] = channel[idx];
numFill++;
}
}
}
}
// need at least 3 to do anything decent
if (numFill < 2)
continue;
nvDebugCheck(lp.fill[2][2] == false);
if (lp.doLocalPixelFill())
{
const int idx = img->index(xb, yb);
channel[idx] = lp.result / lp.weight;
if (c == firstChannel)
{
//coverageChannel[idx] /= lp.weight; // @@ Not sure what this was for, coverageChannel[idx] is always zero.
newbmap->setBitAt(xb, yb);
}
}
}
}
}
// Update the bit mask.
swap(*newbmap, *bmap);
}
}

96
src/nvimage/HoleFilling.h Normal file
View File

@ -0,0 +1,96 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_IMAGE_HOLEFILLING_H
#define NV_IMAGE_HOLEFILLING_H
#include <nvcore/BitArray.h>
#include <nvimage/nvimage.h>
namespace nv
{
class FloatImage;
/// Bit mask.
class BitMap
{
public:
BitMap(uint w, uint h) :
m_width(w), m_height(h), m_bitArray(w*h)
{
}
const uint width() const { return m_width; }
const uint height() const { return m_height; }
bool bitAt(uint x, uint y) const
{
nvDebugCheck(x < m_width && y < m_height);
return m_bitArray.bitAt(y * m_width + x);
}
bool bitAt(uint idx) const
{
return m_bitArray.bitAt(idx);
}
void setBitAt(uint x, uint y)
{
nvDebugCheck(x < m_width && y < m_height);
m_bitArray.setBitAt(y * m_width + x);
}
void setBitAt(uint idx)
{
m_bitArray.setBitAt(idx);
}
void clearBitAt(uint x, uint y)
{
nvDebugCheck(x < m_width && y < m_height);
m_bitArray.clearBitAt(y * m_width + x);
}
void clearBitAt(uint idx)
{
m_bitArray.clearBitAt(idx);
}
void clearAll()
{
m_bitArray.clearAll();
}
void setAll()
{
m_bitArray.setAll();
}
void toggleAll()
{
m_bitArray.toggleAll();
}
friend void swap(BitMap & a, BitMap & b)
{
nvCheck(a.m_width == b.m_width);
nvCheck(a.m_height == b.m_height);
//swap(const_cast<uint &>(a.m_width), const_cast<uint &>(b.m_width));
//swap(const_cast<uint &>(a.m_height), const_cast<uint &>(b.m_height));
swap(a.m_bitArray, b.m_bitArray);
}
private:
const uint m_width;
const uint m_height;
BitArray m_bitArray;
};
NVIMAGE_API void fillVoronoi(FloatImage * img, const BitMap * bmap);
NVIMAGE_API void fillBlur(FloatImage * img, const BitMap * bmap);
NVIMAGE_API void fillPullPush(FloatImage * img, const BitMap * bmap);
NVIMAGE_API void fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap);
NVIMAGE_API void fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1);
} // nv namespace
#endif // NV_IMAGE_HOLEFILLING_H

View File

@ -1,13 +1,12 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvimage/Image.h>
#include <nvimage/ImageIO.h>
#include <nvmath/Color.h>
#include <nvcore/Debug.h>
#include <nvcore/Ptr.h>
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include <nvimage/ImageIO.h>
using namespace nv;
@ -41,7 +40,7 @@ void Image::allocate(uint w, uint h)
{
m_width = w;
m_height = h;
m_data = (Color32 *)nv::mem::realloc(m_data, w * h * sizeof(Color32));
m_data = (Color32 *)realloc(m_data, w * h * sizeof(Color32));
}
bool Image::load(const char * name)

File diff suppressed because it is too large Load Diff

View File

@ -5,9 +5,6 @@
#include <nvimage/nvimage.h>
#include <nvcore/StrLib.h>
namespace nv
{
class Image;
@ -16,22 +13,43 @@ namespace nv
namespace ImageIO
{
struct ImageMetaData
{
HashMap<String, String> tagMap;
};
NVIMAGE_API Image * load(const char * fileName);
NVIMAGE_API Image * load(const char * fileName, Stream & s);
NVIMAGE_API FloatImage * loadFloat(const char * fileName);
NVIMAGE_API FloatImage * loadFloat(const char * fileName, Stream & s);
NVIMAGE_API bool save(const char * fileName, const Image * img, const ImageMetaData * tags=NULL);
NVIMAGE_API bool save(const char * fileName, Stream & s, const Image * img, const ImageMetaData * tags=NULL);
NVIMAGE_API bool save(const char * fileName, Stream & s, Image * img);
NVIMAGE_API bool save(const char * fileName, Image * img);
NVIMAGE_API bool saveFloat(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
NVIMAGE_API bool saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount);
NVIMAGE_API bool saveFloat(const char * fileName, Stream & s, const FloatImage * fimage, uint baseComponent, uint componentCount);
NVIMAGE_API Image * loadTGA(Stream & s);
NVIMAGE_API bool saveTGA(Stream & s, const Image * img);
NVIMAGE_API Image * loadPSD(Stream & s);
#if defined(HAVE_PNG)
NVIMAGE_API Image * loadPNG(Stream & s);
#endif
#if defined(HAVE_JPEG)
NVIMAGE_API Image * loadJPG(Stream & s);
#endif
#if defined(HAVE_TIFF)
NVIMAGE_API FloatImage * loadFloatTIFF(const char * fileName, Stream & s);
NVIMAGE_API bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
#endif
#if defined(HAVE_OPENEXR)
NVIMAGE_API FloatImage * loadFloatEXR(const char * fileName, Stream & s);
NVIMAGE_API bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
#endif
// NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s);
// NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
} // ImageIO namespace

View File

@ -36,9 +36,9 @@ using namespace nv;
// Create normal map using the given kernels.
static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, const Kernel2 * kdu, const Kernel2 * kdv)
{
nvDebugCheck(kdu != NULL);
nvDebugCheck(kdv != NULL);
nvDebugCheck(img != NULL);
nvCheck(kdu != NULL);
nvCheck(kdv != NULL);
nvCheck(img != NULL);
const uint w = img->width();
const uint h = img->height();
@ -75,54 +75,10 @@ static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm,
}
// Create normal map using the given kernels.
static FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, const Kernel2 * kdu, const Kernel2 * kdv)
{
nvDebugCheck(kdu != NULL);
nvDebugCheck(kdv != NULL);
nvDebugCheck(img != NULL);
#pragma message(NV_FILE_LINE "FIXME: Height scale parameter should go away. It should be a sensible value that produces good results when the heightmap is in the [0, 1] range.")
const float heightScale = 1.0f / 16.0f;
const uint w = img->width();
const uint h = img->height();
AutoPtr<FloatImage> img_out(new FloatImage());
img_out->allocate(4, w, h);
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
const float du = img->applyKernel(kdu, x, y, 3, wm);
const float dv = img->applyKernel(kdv, x, y, 3, wm);
Vector3 n = normalize(Vector3(du, dv, heightScale));
img_out->setPixel(n.x(), x, y, 0);
img_out->setPixel(n.y(), x, y, 1);
img_out->setPixel(n.z(), x, y, 2);
}
}
// Copy alpha channel.
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
img_out->setPixel(img->pixel(x, y, 3), x, y, 3);
}
}
return img_out.release();
}
/// Create normal map using the given filter.
FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter /*= Sobel3x3*/)
{
nvDebugCheck(img != NULL);
nvCheck(img != NULL);
// Init the kernels.
Kernel2 * kdu = NULL;
@ -159,7 +115,7 @@ FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vec
/// Create normal map combining multiple sobel filters.
FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights)
{
nvDebugCheck(img != NULL);
nvCheck(img != NULL);
Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL;
@ -174,32 +130,10 @@ FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vec
return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
}
FloatImage * nv::createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights)
{
nvDebugCheck(img != NULL);
Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL;
kdu = new Kernel2(9);
kdu->initBlendedSobel(filterWeights);
kdu->normalize();
kdv = new Kernel2(*kdu);
kdv->transpose();
return ::createNormalMap(img, wm, kdu, kdv);
}
/// Normalize the given image in place.
void nv::normalizeNormalMap(FloatImage * img)
{
nvDebugCheck(img != NULL);
#pragma message(NV_FILE_LINE "TODO: Pack and expand normals explicitly")
nvCheck(img != NULL);
img->expandNormals(0);
img->normalize(0);
img->packNormals(0);

View File

@ -41,11 +41,9 @@ namespace nv
NormalMapFilter_Sobel9x9, // very large
};
// @@ These two functions should be deprecated:
FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3);
FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights);
FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights);
FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights);
void normalizeNormalMap(FloatImage * img);

View File

@ -0,0 +1,98 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Ptr.h>
#include <nvmath/Montecarlo.h>
#include <nvmath/SphericalHarmonic.h>
#include <nvimage/NormalMipmap.h>
#include <nvimage/FloatImage.h>
using namespace nv;
FloatImage * nv::createNormalMipmapMap(const FloatImage * img)
{
nvDebugCheck(img != NULL);
uint w = img->width();
uint h = img->height();
uint hw = w / 2;
uint hh = h / 2;
FloatImage dotImg;
dotImg.allocate(1, w, h);
FloatImage shImg;
shImg.allocate(9, hw, hh);
SampleDistribution distribution(256);
const uint sampleCount = distribution.sampleCount();
for (uint d = 0; d < sampleCount; d++)
{
const float * xChannel = img->channel(0);
const float * yChannel = img->channel(1);
const float * zChannel = img->channel(2);
Vector3 dir = distribution.sampleDir(d);
Sh2 basis;
basis.eval(dir);
for(uint i = 0; i < w*h; i++)
{
Vector3 normal(xChannel[i], yChannel[i], zChannel[i]);
normal = normalizeSafe(normal, Vector3(zero), 0.0f);
dotImg.setPixel(dot(dir, normal), d);
}
// @@ It would be nice to have a fastDownSample that took an existing image as an argument, to avoid allocations.
AutoPtr<FloatImage> dotMip(dotImg.fastDownSample());
for(uint p = 0; p < hw*hh; p++)
{
float f = dotMip->pixel(p);
// Project irradiance to sh basis and accumulate.
for (uint i = 0; i < 9; i++)
{
float & sum = shImg.channel(i)[p];
sum += f * basis.elemAt(i);
}
}
}
FloatImage * normalMipmap = new FloatImage;
normalMipmap->allocate(4, hw, hh);
// Precompute the clamped cosine radiance transfer.
Sh2 prt;
prt.cosineTransfer();
// Allocate outside the loop.
Sh2 sh;
for(uint p = 0; p < hw*hh; p++)
{
for (uint i = 0; i < 9; i++)
{
sh.elemAt(i) = shImg.channel(i)[p];
}
// Convolve sh irradiance by radiance transfer.
sh *= prt;
// Now sh(0) is the ambient occlusion.
// and sh(1) is the normal direction.
// Should we use SVD to fit only the normals to the SH?
}
return normalMipmap;
}

View File

@ -0,0 +1,17 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_IMAGE_NORMALMIPMAP_H
#define NV_IMAGE_NORMALMIPMAP_H
#include <nvimage/nvimage.h>
namespace nv
{
class FloatImage;
FloatImage * createNormalMipmapMap(const FloatImage * img);
} // nv namespace
#endif // NV_IMAGE_NORMALMIPMAP_H

View File

@ -1,102 +0,0 @@
// This code is in the public domain -- castano@gmail.com
#include "TiledImage.h"
#include <nvcore/StdStream.h>
using namespace nv;
namespace
{
// MRU helpers:
// ...
}
bool Tile::load(const char * name)
{
StdInputStream stream(name);
if (stream.isError()) {
return false;
}
uint header;
stream << header;
if (header == 'NVTC') {
return false;
}
uint count;
stream << count;
if (count != w*h) {
return false;
}
const uint size = count * sizeof(float);
return stream.serialize(data, size) == size;
}
bool Tile::unload(const char * name)
{
StdOutputStream stream(name);
if (stream.isError()) {
return false;
}
uint header = 'NVTC';
uint count = w * h;
const uint size = w * h * sizeof(float);
stream << header << count;
return stream.serialize(data, size) == size;
}
TiledImage::TiledImage()
{
}
void TiledImage::allocate(uint c, uint w, uint h, uint pageCount)
{
// Allocate page map:
const uint pw = ((w + TILE_SIZE - 1) / TILE_SIZE);
const uint ph = ((h + TILE_SIZE - 1) / TILE_SIZE);
const uint size = c * pw * ph;
m_pageMap.resize(size);
m_residentArray.resize(pageCount, ~0);
}
void TiledImage::prefetch(uint c, uint x, uint y)
{
}
void TiledImage::prefetch(uint c, uint x, uint y, uint w, uint h)
{
}
void TiledImage::loadPage(uint x, uint y)
{
const uint pw = ((w + TILE_SIZE - 1) / TILE_SIZE);
const uint ph = ((h + TILE_SIZE - 1) / TILE_SIZE);
nvDebugCheck(x < pw);
nvDebugCheck(y < ph);
}

View File

@ -1,152 +0,0 @@
// This code is in the public domain -- castano@gmail.com
#ifndef NV_IMAGE_TILEDIMAGE_H
#define NV_IMAGE_TILEDIMAGE_H
#include <nvcore/Debug.h>
#include <nvcore/StrLib.h>
#include <nvimage/nvimage.h>
// For simplicity the tile size is fixed at compile time.
#define TILE_SIZE 256
// 256 * 256 * 4 = 2^(8+8+2) = 2^18 = 256 KB
// 512 * 512 * 4 = 2^(9+9+2) = 2^20 = 1 MB
namespace nv
{
#if 0
struct ImageConcept
{
float pixel(uint x, uint y) const;
};
enum WrapMode {
WrapMode_Clamp,
WrapMode_Repeat,
WrapMode_Mirror
};
template <class T>
class Sampler
{
// ...
};
#endif
class Tile
{
Tile(uint x, uint y, uint w, uint h) : xoffset(x), yoffset(y), w(w), h(h)
{
data = new float[w*h];
}
~Tile()
{
delete [] data;
}
uint size() const
{
return w * h * sizeof(float);
}
float pixel(uint x, uint y) const
{
x -= xoffset;
y -= yoffset;
nvDebugCheck (x < w);
nvDebugCheck (y < h);
return data[y * w + x];
}
bool load(const char * name);
void unload(const char * name);
uint xoffset, yoffset;
uint w, h;
float * data;
};
class TiledImage
{
public:
TiledImage();
void allocate(uint c, uint w, uint h, uint pageCount);
uint componentCount() const { return m_componentCount; }
uint width() const { return m_width; }
uint height() const { return m_height; }
uint pageCount() const { return m_residentArray.count(); }
void prefetch(uint c, uint x, uint y);
void prefetch(uint c, uint x, uint y, uint w, uint h);
float pixel(uint c, uint x, uint y);
private:
Tile * tileAt(uint c, uint x, uint y);
Tile * tileAt(uint idx);
uint loadPage(uint x, uint y);
void unloadPage(Tile *);
uint addAndReplace(uint newPage);
private:
uint m_componentCount;
uint m_width;
uint m_height;
struct Page {
Page() : tile(NULL) {}
String tmpFileName;
Tile * tile;
};
mutable Array<Page> m_pageMap;
mutable Array<uint> m_residentArray; // MRU
};
inline float TiledImage::pixel(uint c, uint x, uint y)
{
nvDebugCheck (c < m_componentCount);
nvDebugCheck (x < m_width);
nvDebugCheck (y < m_height);
uint px = x / TILE_SIZE;
uint py = y / TILE_SIZE;
Tile * tile = tileAt(c, px, py);
if (tile == NULL) {
tile = loadPage(c, px, py);
}
return tile->pixel(x, y);
}
inline Tile * TiledImage::tileAt(uint c, uint x, uint y)
{
uint idx = (c * h + y) * w + x;
return tileAt(idx);
}
inline Tile * TiledImage::tileAt(uint idx)
{
return m_pageMap[idx].tile;
}
} // nv namespace
#endif // NV_IMAGE_TILEDIMAGE_H

173
src/nvmath/Basis.cpp Normal file
View File

@ -0,0 +1,173 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/Basis.h>
using namespace nv;
/// Normalize basis vectors.
void Basis::normalize(float epsilon /*= NV_EPSILON*/)
{
normal = ::normalize(normal, epsilon);
tangent = ::normalize(tangent, epsilon);
bitangent = ::normalize(bitangent, epsilon);
}
/// Gram-Schmidt orthogonalization.
/// @note Works only if the vectors are close to orthogonal.
void Basis::orthonormalize(float epsilon /*= NV_EPSILON*/)
{
// N' = |N|
// T' = |T - (N' dot T) N'|
// B' = |B - (N' dot B) N' - (T' dot B) T'|
normal = ::normalize(normal, epsilon);
tangent -= normal * dot(normal, tangent);
tangent = ::normalize(tangent, epsilon);
bitangent -= normal * dot(normal, bitangent);
bitangent -= tangent * dot(tangent, bitangent);
bitangent = ::normalize(bitangent, epsilon);
}
/// Robust orthonormalization.
/// Returns an orthonormal basis even when the original is degenerate.
void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
{
if (length(normal) < epsilon)
{
normal = cross(tangent, bitangent);
if (length(normal) < epsilon)
{
tangent = Vector3(1, 0, 0);
bitangent = Vector3(0, 1, 0);
normal = Vector3(0, 0, 1);
return;
}
}
normal = ::normalize(normal, epsilon);
tangent -= normal * dot(normal, tangent);
bitangent -= normal * dot(normal, bitangent);
if (length(tangent) < epsilon)
{
if (length(bitangent) < epsilon)
{
buildFrameForDirection(normal);
}
else
{
tangent = cross(bitangent, normal);
nvCheck(isNormalized(tangent, epsilon));
}
}
else
{
tangent = ::normalize(tangent, epsilon);
bitangent -= tangent * dot(tangent, bitangent);
if (length(bitangent) < epsilon)
{
bitangent = cross(tangent, normal);
nvCheck(isNormalized(bitangent));
}
else
{
tangent = ::normalize(tangent, epsilon);
}
}
// Check vector lengths.
nvCheck(isNormalized(normal, epsilon));
nvCheck(isNormalized(tangent, epsilon));
nvCheck(isNormalized(bitangent, epsilon));
// Check vector angles.
nvCheck(equal(dot(normal, tangent), 0.0f, epsilon));
nvCheck(equal(dot(normal, bitangent), 0.0f, epsilon));
nvCheck(equal(dot(tangent, bitangent), 0.0f, epsilon));
// Check vector orientation.
const float det = dot(cross(normal, tangent), bitangent);
nvCheck(equal(det, 1.0f, epsilon) || equal(det, -1.0f, epsilon));
}
/// Build an arbitrary frame for the given direction.
void Basis::buildFrameForDirection(Vector3::Arg d)
{
nvCheck(isNormalized(d));
normal = d;
// Choose minimum axis.
if (fabsf(normal.x()) < fabsf(normal.y()) && fabsf(normal.x()) < fabsf(normal.z()))
{
tangent = Vector3(1, 0, 0);
}
else if (fabsf(normal.y()) < fabsf(normal.z()))
{
tangent = Vector3(0, 1, 0);
}
else
{
tangent = Vector3(0, 0, 1);
}
// Ortogonalize
tangent -= normal * dot(normal, tangent);
tangent = ::normalize(tangent);
bitangent = cross(normal, tangent);
}
/*
/// Transform by this basis. (From this basis to object space).
Vector3 Basis::transform(Vector3::Arg v) const
{
Vector3 o = tangent * v.x();
o += bitangent * v.y();
o += normal * v.z();
return o;
}
/// Transform by the transpose. (From object space to this basis).
Vector3 Basis::transformT(Vector3::Arg v)
{
return Vector3(dot(tangent, v), dot(bitangent, v), dot(normal, v));
}
/// Transform by the inverse. (From object space to this basis).
/// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned.
Vector3 Basis::transformI(Vector3::Arg v) const
{
const float det = determinant();
nvCheck(!equalf(det, 0.0f));
const float idet = 1.0f / det;
// Rows of the inverse matrix.
Vector3 r0, r1, r2;
r0.x = (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet;
r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet;
r0.z = (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet;
r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet;
r1.y = (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet;
r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet;
r2.x = (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet;
r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet;
r2.z = (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet;
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2));
}
*/

78
src/nvmath/Basis.h Normal file
View File

@ -0,0 +1,78 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_BASIS_H
#define NV_MATH_BASIS_H
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
#include <nvmath/Matrix.h>
namespace nv
{
/// Basis class to compute tangent space basis, ortogonalizations and to
/// transform vectors from one space to another.
struct Basis
{
/// Create a null basis.
Basis() : tangent(0, 0, 0), bitangent(0, 0, 0), normal(0, 0, 0) {}
/// Create a basis given three vectors.
Basis(Vector3::Arg n, Vector3::Arg t, Vector3::Arg b) : tangent(t), bitangent(b), normal(n) {}
/// Create a basis with the given tangent vectors and the handness.
Basis(Vector3::Arg n, Vector3::Arg t, float sign)
{
build(n, t, sign);
}
NVMATH_API void normalize(float epsilon = NV_EPSILON);
NVMATH_API void orthonormalize(float epsilon = NV_EPSILON);
NVMATH_API void robustOrthonormalize(float epsilon = NV_EPSILON);
NVMATH_API void buildFrameForDirection(Vector3::Arg d);
/// Calculate the determinant [ F G N ] to obtain the handness of the basis.
float handness() const
{
return determinant() > 0.0f ? 1.0f : -1.0f;
}
/// Build a basis from 2 vectors and a handness flag.
void build(Vector3::Arg n, Vector3::Arg t, float sign)
{
normal = n;
tangent = t;
bitangent = sign * cross(t, n);
}
/// Compute the determinant of this basis.
float determinant() const
{
return
tangent.x() * bitangent.y() * normal.z() - tangent.z() * bitangent.y() * normal.x() +
tangent.y() * bitangent.z() * normal.x() - tangent.y() * bitangent.x() * normal.z() +
tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y();
}
/*
// Get transform matrix for this basis.
NVMATH_API Matrix matrix() const;
// Transform by this basis. (From this basis to object space).
NVMATH_API Vector3 transform(Vector3::Arg v) const;
// Transform by the transpose. (From object space to this basis).
NVMATH_API Vector3 transformT(Vector3::Arg v);
// Transform by the inverse. (From object space to this basis).
NVMATH_API Vector3 transformI(Vector3::Arg v) const;
*/
Vector3 tangent;
Vector3 bitangent;
Vector3 normal;
};
} // nv namespace
#endif // NV_MATH_BASIS_H

View File

@ -9,7 +9,6 @@
namespace nv
{
class Stream;
/// Axis Aligned Bounding Box.
class Box
@ -28,13 +27,11 @@ public:
// Cast operators.
operator const float * () const { return reinterpret_cast<const float *>(this); }
// Min corner of the box.
Vector3 minCorner() const { return m_mins; }
Vector3 & minCorner() { return m_mins; }
/// Min corner of the box.
Vector3 mins() const { return m_mins; }
// Max corner of the box.
Vector3 maxCorner() const { return m_maxs; }
Vector3 & maxCorner() { return m_maxs; }
/// Max corner of the box.
Vector3 maxs() const { return m_maxs; }
/// Clear the bounds.
void clearBounds()
@ -129,8 +126,6 @@ public:
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
}
friend Stream & operator<< (Stream & s, Box & box);
private:
Vector3 m_mins;

View File

@ -1,14 +1,17 @@
PROJECT(nvmath)
SET(MATH_SRCS
nvmath.h
Vector.h
Matrix.h
Plane.h Plane.cpp
Box.h
Color.h
Half.h Half.cpp
Fitting.h Fitting.cpp)
nvmath.h
Vector.h
Matrix.h
Quaternion.h
Box.h
Color.h
Montecarlo.h Montecarlo.cpp
Random.h Random.cpp
SphericalHarmonic.h SphericalHarmonic.cpp
Basis.h Basis.cpp
Triangle.h Triangle.cpp TriBox.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
@ -16,15 +19,15 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVMATH_EXPORTS)
IF(NVMATH_SHARED)
ADD_DEFINITIONS(-DNVMATH_SHARED=1)
ADD_LIBRARY(nvmath SHARED ${MATH_SRCS})
ADD_DEFINITIONS(-DNVMATH_SHARED=1)
ADD_LIBRARY(nvmath SHARED ${MATH_SRCS})
ELSE(NVMATH_SHARED)
ADD_LIBRARY(nvmath ${MATH_SRCS})
ADD_LIBRARY(nvmath ${MATH_SRCS})
ENDIF(NVMATH_SHARED)
TARGET_LINK_LIBRARIES(nvmath ${LIBS} nvcore)
INSTALL(TARGETS nvmath
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib/static)
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib/static)

View File

@ -1,247 +0,0 @@
// This code is in the public domain -- icastano@gmail.com
#include "Fitting.h"
#include <nvcore/Algorithms.h> // max
#include <nvcore/Containers.h> // swap
#include <float.h> // FLT_MAX
using namespace nv;
// @@ Move to EigenSolver.h
static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matrix)
{
if (matrix[0] == 0 || matrix[3] == 0 || matrix[5] == 0)
{
return Vector3(zero);
}
const int NUM = 8;
Vector3 v(1, 1, 1);
for (int i = 0; i < NUM; i++)
{
float x = v.x() * matrix[0] + v.y() * matrix[1] + v.z() * matrix[2];
float y = v.x() * matrix[1] + v.y() * matrix[3] + v.z() * matrix[4];
float z = v.x() * matrix[2] + v.y() * matrix[4] + v.z() * matrix[5];
float norm = max(max(x, y), z);
v = Vector3(x, y, z) / norm;
}
return v;
}
Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points)
{
Vector3 centroid(zero);
for (int i = 0; i < n; i++)
{
centroid += points[i];
}
centroid /= float(n);
return centroid;
}
Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
{
Vector3 centroid(zero);
float total = 0.0f;
for (int i = 0; i < n; i++)
{
total += weights[i];
centroid += weights[i]*points[i];
}
centroid /= total;
return centroid;
}
Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, float *__restrict covariance)
{
// compute the centroid
Vector3 centroid = computeCentroid(n, points);
// compute covariance matrix
for (int i = 0; i < 6; i++)
{
covariance[i] = 0.0f;
}
for (int i = 0; i < n; i++)
{
Vector3 v = points[i] - centroid;
covariance[0] += v.x() * v.x();
covariance[1] += v.x() * v.y();
covariance[2] += v.x() * v.z();
covariance[3] += v.y() * v.y();
covariance[4] += v.y() * v.z();
covariance[5] += v.z() * v.z();
}
return centroid;
}
Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, float *__restrict covariance)
{
// compute the centroid
Vector3 centroid = computeCentroid(n, points, weights, metric);
// compute covariance matrix
for (int i = 0; i < 6; i++)
{
covariance[i] = 0.0f;
}
for (int i = 0; i < n; i++)
{
Vector3 a = (points[i] - centroid) * metric;
Vector3 b = weights[i]*a;
covariance[0] += a.x()*b.x();
covariance[1] += a.x()*b.y();
covariance[2] += a.x()*b.z();
covariance[3] += a.y()*b.y();
covariance[4] += a.y()*b.z();
covariance[5] += a.z()*b.z();
}
return centroid;
}
Vector3 nv::Fit::computePrincipalComponent(int n, const Vector3 *__restrict points)
{
float matrix[6];
computeCovariance(n, points, matrix);
return firstEigenVector_PowerMethod(matrix);
}
Vector3 nv::Fit::computePrincipalComponent(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
{
float matrix[6];
computeCovariance(n, points, weights, metric, matrix);
return firstEigenVector_PowerMethod(matrix);
}
Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points)
{
// compute the centroid and covariance
float matrix[6];
Vector3 centroid = computeCovariance(n, points, matrix);
if (matrix[0] == 0 || matrix[3] == 0 || matrix[5] == 0)
{
// If no plane defined, then return a horizontal plane.
return Plane(Vector3(0, 0, 1), centroid);
}
#pragma message(NV_FILE_LINE "TODO: need to write an eigensolver!")
// - Numerical Recipes in C is a good reference. Householder transforms followed by QL decomposition seems to be the best approach.
// - The one from magic-tools is now LGPL. For the 3D case it uses a cubic root solver, which is not very accurate.
// - Charles' Galaxy3 contains an implementation of the tridiagonalization method, but is under BPL.
//EigenSolver3 solver(matrix);
return Plane();
}
int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, Vector3 *__restrict cluster)
{
// Compute principal component.
float matrix[6];
Vector3 centroid = computeCovariance(n, points, weights, metric, matrix);
Vector3 principal = firstEigenVector_PowerMethod(matrix);
// Pick initial solution.
int mini, maxi;
mini = maxi = 0;
float mindps, maxdps;
mindps = maxdps = dot(points[0] - centroid, principal);
for (int i = 1; i < n; ++i)
{
float dps = dot(points[i] - centroid, principal);
if (dps < mindps) {
mindps = dps;
mini = i;
}
else {
maxdps = dps;
maxi = i;
}
}
cluster[0] = centroid + mindps * principal;
cluster[1] = centroid + maxdps * principal;
cluster[2] = (2 * cluster[0] + cluster[1]) / 3;
cluster[3] = (2 * cluster[1] + cluster[0]) / 3;
// Now we have to iteratively refine the clusters.
while (true)
{
Vector3 newCluster[4] = { Vector3(zero), Vector3(zero), Vector3(zero), Vector3(zero) };
float total[4] = {0, 0, 0, 0};
for (int i = 0; i < n; ++i)
{
// Find nearest cluster.
int nearest = 0;
float mindist = FLT_MAX;
for (int j = 0; j < 4; j++)
{
float dist = length_squared((cluster[j] - points[i]) * metric);
if (dist < mindist)
{
mindist = dist;
nearest = j;
}
}
newCluster[nearest] += weights[i] * points[i];
total[nearest] += weights[i];
}
for (int j = 0; j < 4; j++)
{
if (total[j] != 0)
newCluster[j] /= total[j];
}
if (equal(cluster[0], newCluster[0]) && equal(cluster[1], newCluster[1]) &&
equal(cluster[2], newCluster[2]) && equal(cluster[3], newCluster[3]))
{
return (total[0] != 0) + (total[1] != 0) + (total[2] != 0) + (total[3] != 0);
}
cluster[0] = newCluster[0];
cluster[1] = newCluster[1];
cluster[2] = newCluster[2];
cluster[3] = newCluster[3];
// Sort clusters by weight.
for (int i = 0; i < 4; i++)
{
for (int j = i; j > 0 && total[j] > total[j - 1]; j--)
{
swap( total[j], total[j - 1] );
swap( cluster[j], cluster[j - 1] );
}
}
}
}

View File

@ -1,31 +0,0 @@
// This code is in the public domain -- icastano@gmail.com
#ifndef NV_MATH_FITTING_H
#define NV_MATH_FITTING_H
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
#include <nvmath/Plane.h>
namespace nv
{
namespace Fit
{
Vector3 computeCentroid(int n, const Vector3 * points);
Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, Vector3::Arg metric);
Vector3 computeCovariance(int n, const Vector3 * points, float * covariance);
Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, Vector3::Arg metric, float * covariance);
Vector3 computePrincipalComponent(int n, const Vector3 * points);
Vector3 computePrincipalComponent(int n, const Vector3 * points, const float * weights, Vector3::Arg metric);
Plane bestPlane(int n, const Vector3 * points);
// Returns number of clusters [1-4].
int compute4Means(int n, const Vector3 * points, const float * weights, Vector3::Arg metric, Vector3 * cluster);
}
} // nv namespace
#endif // NV_MATH_FITTING_H

View File

@ -1,563 +0,0 @@
// Branch-free implementation of half-precision (16 bit) floating point
// Copyright 2006 Mike Acton <macton@gmail.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
//
// Half-precision floating point format
// ------------------------------------
//
// | Field | Last | First | Note
// |----------|------|-------|----------
// | Sign | 15 | 15 |
// | Exponent | 14 | 10 | Bias = 15
// | Mantissa | 9 | 0 |
//
// Compiling
// ---------
//
// Preferred compile flags for GCC:
// -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing
//
// This file is a C99 source file, intended to be compiled with a C99
// compliant compiler. However, for the moment it remains combatible
// with C++98. Therefore if you are using a compiler that poorly implements
// C standards (e.g. MSVC), it may be compiled as C++. This is not
// guaranteed for future versions.
//
// Features
// --------
//
// * QNaN + <x> = QNaN
// * <x> + +INF = +INF
// * <x> - -INF = -INF
// * INF - INF = SNaN
// * Denormalized values
// * Difference of ZEROs is always +ZERO
// * Sum round with guard + round + sticky bit (grs)
// * And of course... no branching
//
// Precision of Sum
// ----------------
//
// (SUM) uint16 z = half_add( x, y );
// (DIFFERENCE) uint16 z = half_add( x, -y );
//
// Will have exactly (0 ulps difference) the same result as:
// (For 32 bit IEEE 784 floating point and same rounding mode)
//
// union FLOAT_32
// {
// float f32;
// uint32 u32;
// };
//
// union FLOAT_32 fx = { .u32 = half_to_float( x ) };
// union FLOAT_32 fy = { .u32 = half_to_float( y ) };
// union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32 };
// uint16 z = float_to_half( fz );
//
#include "Half.h"
#include <stdio.h>
// Load immediate
static inline uint32 _uint32_li( uint32 a )
{
return (a);
}
// Decrement
static inline uint32 _uint32_dec( uint32 a )
{
return (a - 1);
}
// Complement
static inline uint32 _uint32_not( uint32 a )
{
return (~a);
}
// Negate
static inline uint32 _uint32_neg( uint32 a )
{
#if NV_CC_MSVC
// prevent msvc warning.
return ~a + 1;
#else
return (-a);
#endif
}
// Extend sign
static inline uint32 _uint32_ext( uint32 a )
{
return (((int32)a)>>31);
}
// And
static inline uint32 _uint32_and( uint32 a, uint32 b )
{
return (a & b);
}
// And with Complement
static inline uint32 _uint32_andc( uint32 a, uint32 b )
{
return (a & ~b);
}
// Or
static inline uint32 _uint32_or( uint32 a, uint32 b )
{
return (a | b);
}
// Shift Right Logical
static inline uint32 _uint32_srl( uint32 a, int sa )
{
return (a >> sa);
}
// Shift Left Logical
static inline uint32 _uint32_sll( uint32 a, int sa )
{
return (a << sa);
}
// Add
static inline uint32 _uint32_add( uint32 a, uint32 b )
{
return (a + b);
}
// Subtract
static inline uint32 _uint32_sub( uint32 a, uint32 b )
{
return (a - b);
}
// Select on Sign bit
static inline uint32 _uint32_sels( uint32 test, uint32 a, uint32 b )
{
const uint32 mask = _uint32_ext( test );
const uint32 sel_a = _uint32_and( a, mask );
const uint32 sel_b = _uint32_andc( b, mask );
const uint32 result = _uint32_or( sel_a, sel_b );
return (result);
}
// Load Immediate
static inline uint16 _uint16_li( uint16 a )
{
return (a);
}
// Extend sign
static inline uint16 _uint16_ext( uint16 a )
{
return (((int16)a)>>15);
}
// Negate
static inline uint16 _uint16_neg( uint16 a )
{
return (-a);
}
// Complement
static inline uint16 _uint16_not( uint16 a )
{
return (~a);
}
// Decrement
static inline uint16 _uint16_dec( uint16 a )
{
return (a - 1);
}
// Shift Left Logical
static inline uint16 _uint16_sll( uint16 a, int sa )
{
return (a << sa);
}
// Shift Right Logical
static inline uint16 _uint16_srl( uint16 a, int sa )
{
return (a >> sa);
}
// Add
static inline uint16 _uint16_add( uint16 a, uint16 b )
{
return (a + b);
}
// Subtract
static inline uint16 _uint16_sub( uint16 a, uint16 b )
{
return (a - b);
}
// And
static inline uint16 _uint16_and( uint16 a, uint16 b )
{
return (a & b);
}
// Or
static inline uint16 _uint16_or( uint16 a, uint16 b )
{
return (a | b);
}
// Exclusive Or
static inline uint16 _uint16_xor( uint16 a, uint16 b )
{
return (a ^ b);
}
// And with Complement
static inline uint16 _uint16_andc( uint16 a, uint16 b )
{
return (a & ~b);
}
// And then Shift Right Logical
static inline uint16 _uint16_andsrl( uint16 a, uint16 b, int sa )
{
return ((a & b) >> sa);
}
// Shift Right Logical then Mask
static inline uint16 _uint16_srlm( uint16 a, int sa, uint16 mask )
{
return ((a >> sa) & mask);
}
// Add then Mask
static inline uint16 _uint16_addm( uint16 a, uint16 b, uint16 mask )
{
return ((a + b) & mask);
}
// Select on Sign bit
static inline uint16 _uint16_sels( uint16 test, uint16 a, uint16 b )
{
const uint16 mask = _uint16_ext( test );
const uint16 sel_a = _uint16_and( a, mask );
const uint16 sel_b = _uint16_andc( b, mask );
const uint16 result = _uint16_or( sel_a, sel_b );
return (result);
}
// Count Leading Zeros
static inline uint32 _uint32_cntlz( uint32 x )
{
#ifdef __GNUC__
/* On PowerPC, this will map to insn: cntlzw */
/* On Pentium, this will map to insn: clz */
uint32 nlz = __builtin_clz( x );
return (nlz);
#else
const uint32 x0 = _uint32_srl( x, 1 );
const uint32 x1 = _uint32_or( x, x0 );
const uint32 x2 = _uint32_srl( x1, 2 );
const uint32 x3 = _uint32_or( x1, x2 );
const uint32 x4 = _uint32_srl( x3, 4 );
const uint32 x5 = _uint32_or( x3, x4 );
const uint32 x6 = _uint32_srl( x5, 8 );
const uint32 x7 = _uint32_or( x5, x6 );
const uint32 x8 = _uint32_srl( x7, 16 );
const uint32 x9 = _uint32_or( x7, x8 );
const uint32 xA = _uint32_not( x9 );
const uint32 xB = _uint32_srl( xA, 1 );
const uint32 xC = _uint32_and( xB, 0x55555555 );
const uint32 xD = _uint32_sub( xA, xC );
const uint32 xE = _uint32_and( xD, 0x33333333 );
const uint32 xF = _uint32_srl( xD, 2 );
const uint32 x10 = _uint32_and( xF, 0x33333333 );
const uint32 x11 = _uint32_add( xE, x10 );
const uint32 x12 = _uint32_srl( x11, 4 );
const uint32 x13 = _uint32_add( x11, x12 );
const uint32 x14 = _uint32_and( x13, 0x0f0f0f0f );
const uint32 x15 = _uint32_srl( x14, 8 );
const uint32 x16 = _uint32_add( x14, x15 );
const uint32 x17 = _uint32_srl( x16, 16 );
const uint32 x18 = _uint32_add( x16, x17 );
const uint32 x19 = _uint32_and( x18, 0x0000003f );
return ( x19 );
#endif
}
// Count Leading Zeros
static inline uint16 _uint16_cntlz( uint16 x )
{
#ifdef __GNUC__
/* On PowerPC, this will map to insn: cntlzw */
/* On Pentium, this will map to insn: clz */
uint32 x32 = _uint32_sll( x, 16 );
uint16 nlz = (uint16)__builtin_clz( x32 );
return (nlz);
#else
const uint16 x0 = _uint16_srl( x, 1 );
const uint16 x1 = _uint16_or( x, x0 );
const uint16 x2 = _uint16_srl( x1, 2 );
const uint16 x3 = _uint16_or( x1, x2 );
const uint16 x4 = _uint16_srl( x3, 4 );
const uint16 x5 = _uint16_or( x3, x4 );
const uint16 x6 = _uint16_srl( x5, 8 );
const uint16 x7 = _uint16_or( x5, x6 );
const uint16 x8 = _uint16_not( x7 );
const uint16 x9 = _uint16_srlm( x8, 1, 0x5555 );
const uint16 xA = _uint16_sub( x8, x9 );
const uint16 xB = _uint16_and( xA, 0x3333 );
const uint16 xC = _uint16_srlm( xA, 2, 0x3333 );
const uint16 xD = _uint16_add( xB, xC );
const uint16 xE = _uint16_srl( xD, 4 );
const uint16 xF = _uint16_addm( xD, xE, 0x0f0f );
const uint16 x10 = _uint16_srl( xF, 8 );
const uint16 x11 = _uint16_addm( xF, x10, 0x001f );
return ( x11 );
#endif
}
uint16
half_from_float( uint32 f )
{
const uint32 one = _uint32_li( 0x00000001 );
const uint32 f_e_mask = _uint32_li( 0x7f800000 );
const uint32 f_m_mask = _uint32_li( 0x007fffff );
const uint32 f_s_mask = _uint32_li( 0x80000000 );
const uint32 h_e_mask = _uint32_li( 0x00007c00 );
const uint32 f_e_pos = _uint32_li( 0x00000017 );
const uint32 f_m_round_bit = _uint32_li( 0x00001000 );
const uint32 h_nan_em_min = _uint32_li( 0x00007c01 );
const uint32 f_h_s_pos_offset = _uint32_li( 0x00000010 );
const uint32 f_m_hidden_bit = _uint32_li( 0x00800000 );
const uint32 f_h_m_pos_offset = _uint32_li( 0x0000000d );
const uint32 f_h_bias_offset = _uint32_li( 0x38000000 );
const uint32 f_m_snan_mask = _uint32_li( 0x003fffff );
const uint16 h_snan_mask = _uint32_li( 0x00007e00 );
const uint32 f_e = _uint32_and( f, f_e_mask );
const uint32 f_m = _uint32_and( f, f_m_mask );
const uint32 f_s = _uint32_and( f, f_s_mask );
const uint32 f_e_h_bias = _uint32_sub( f_e, f_h_bias_offset );
const uint32 f_e_h_bias_amount = _uint32_srl( f_e_h_bias, f_e_pos );
const uint32 f_m_round_mask = _uint32_and( f_m, f_m_round_bit );
const uint32 f_m_round_offset = _uint32_sll( f_m_round_mask, one );
const uint32 f_m_rounded = _uint32_add( f_m, f_m_round_offset );
const uint32 f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit );
const uint32 f_m_denorm_sa = _uint32_sub( one, f_e_h_bias_amount );
const uint32 f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit );
const uint32 f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa );
const uint32 f_em_norm_packed = _uint32_or( f_e_h_bias, f_m_rounded );
const uint32 f_e_overflow = _uint32_add( f_e_h_bias, f_m_hidden_bit );
const uint32 h_s = _uint32_srl( f_s, f_h_s_pos_offset );
const uint32 h_m_nan = _uint32_srl( f_m, f_h_m_pos_offset );
const uint32 h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset );
const uint32 h_em_norm = _uint32_srl( f_em_norm_packed, f_h_m_pos_offset );
const uint32 h_em_overflow = _uint32_srl( f_e_overflow, f_h_m_pos_offset );
const uint32 is_e_eqz_msb = _uint32_dec( f_e );
const uint32 is_m_nez_msb = _uint32_neg( f_m );
const uint32 is_h_m_nan_nez_msb = _uint32_neg( h_m_nan );
const uint32 is_e_nflagged_msb = _uint32_sub( f_e, f_e_mask );
const uint32 is_ninf_msb = _uint32_or( is_e_nflagged_msb, is_m_nez_msb );
const uint32 is_underflow_msb = _uint32_sub( is_e_eqz_msb, f_h_bias_offset );
const uint32 is_nan_nunderflow_msb = _uint32_or( is_h_m_nan_nez_msb, is_e_nflagged_msb );
const uint32 is_m_snan_msb = _uint32_sub( f_m_snan_mask, f_m );
const uint32 is_snan_msb = _uint32_andc( is_m_snan_msb, is_e_nflagged_msb );
const uint32 is_overflow_msb = _uint32_neg( f_m_rounded_overflow );
const uint32 h_nan_underflow_result = _uint32_sels( is_nan_nunderflow_msb, h_em_norm, h_nan_em_min );
const uint32 h_inf_result = _uint32_sels( is_ninf_msb, h_nan_underflow_result, h_e_mask );
const uint32 h_underflow_result = _uint32_sels( is_underflow_msb, h_m_denorm, h_inf_result );
const uint32 h_overflow_result = _uint32_sels( is_overflow_msb, h_em_overflow, h_underflow_result );
const uint32 h_em_result = _uint32_sels( is_snan_msb, h_snan_mask, h_overflow_result );
const uint32 h_result = _uint32_or( h_em_result, h_s );
return (h_result);
}
uint32
half_to_float( uint16 h )
{
const uint32 h_e_mask = _uint32_li( 0x00007c00 );
const uint32 h_m_mask = _uint32_li( 0x000003ff );
const uint32 h_s_mask = _uint32_li( 0x00008000 );
const uint32 h_f_s_pos_offset = _uint32_li( 0x00000010 );
const uint32 h_f_e_pos_offset = _uint32_li( 0x0000000d );
const uint32 h_f_bias_offset = _uint32_li( 0x0001c000 );
const uint32 f_e_mask = _uint32_li( 0x7f800000 );
const uint32 f_m_mask = _uint32_li( 0x007fffff );
const uint32 h_f_e_denorm_bias = _uint32_li( 0x0000007e );
const uint32 h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 );
const uint32 f_e_pos = _uint32_li( 0x00000017 );
const uint32 h_e_mask_minus_one = _uint32_li( 0x00007bff );
const uint32 h_e = _uint32_and( h, h_e_mask );
const uint32 h_m = _uint32_and( h, h_m_mask );
const uint32 h_s = _uint32_and( h, h_s_mask );
const uint32 h_e_f_bias = _uint32_add( h_e, h_f_bias_offset );
const uint32 h_m_nlz = _uint32_cntlz( h_m );
const uint32 f_s = _uint32_sll( h_s, h_f_s_pos_offset );
const uint32 f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
const uint32 f_m = _uint32_sll( h_m, h_f_e_pos_offset );
const uint32 f_em = _uint32_or( f_e, f_m );
const uint32 h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias );
const uint32 f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa );
const uint32 h_f_m = _uint32_sll( h_m, h_f_m_sa );
const uint32 f_m_denorm = _uint32_and( h_f_m, f_m_mask );
const uint32 f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos );
const uint32 f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm );
const uint32 f_em_nan = _uint32_or( f_e_mask, f_m );
const uint32 is_e_eqz_msb = _uint32_dec( h_e );
const uint32 is_m_nez_msb = _uint32_neg( h_m );
const uint32 is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e );
const uint32 is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb );
const uint32 is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb );
const uint32 is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb );
const uint32 is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb );
const uint32 is_zero = _uint32_ext( is_zero_msb );
const uint32 f_zero_result = _uint32_andc( f_em, is_zero );
const uint32 f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
const uint32 f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result );
const uint32 f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result );
const uint32 f_result = _uint32_or( f_s, f_nan_result );
return (f_result);
}
uint16
half_add( uint16 x, uint16 y )
{
const uint16 one = _uint16_li( 0x0001 );
const uint16 msb_to_lsb_sa = _uint16_li( 0x000f );
const uint16 h_s_mask = _uint16_li( 0x8000 );
const uint16 h_e_mask = _uint16_li( 0x7c00 );
const uint16 h_m_mask = _uint16_li( 0x03ff );
const uint16 h_m_msb_mask = _uint16_li( 0x2000 );
const uint16 h_m_msb_sa = _uint16_li( 0x000d );
const uint16 h_m_hidden = _uint16_li( 0x0400 );
const uint16 h_e_pos = _uint16_li( 0x000a );
const uint16 h_e_bias_minus_one = _uint16_li( 0x000e );
const uint16 h_m_grs_carry = _uint16_li( 0x4000 );
const uint16 h_m_grs_carry_pos = _uint16_li( 0x000e );
const uint16 h_grs_size = _uint16_li( 0x0003 );
const uint16 h_snan = _uint16_li( 0xfe00 );
const uint16 h_e_mask_minus_one = _uint16_li( 0x7bff );
const uint16 h_grs_round_carry = _uint16_sll( one, h_grs_size );
const uint16 h_grs_round_mask = _uint16_sub( h_grs_round_carry, one );
const uint16 x_e = _uint16_and( x, h_e_mask );
const uint16 y_e = _uint16_and( y, h_e_mask );
const uint16 is_y_e_larger_msb = _uint16_sub( x_e, y_e );
const uint16 a = _uint16_sels( is_y_e_larger_msb, y, x);
const uint16 a_s = _uint16_and( a, h_s_mask );
const uint16 a_e = _uint16_and( a, h_e_mask );
const uint16 a_m_no_hidden_bit = _uint16_and( a, h_m_mask );
const uint16 a_em_no_hidden_bit = _uint16_or( a_e, a_m_no_hidden_bit );
const uint16 b = _uint16_sels( is_y_e_larger_msb, x, y);
const uint16 b_s = _uint16_and( b, h_s_mask );
const uint16 b_e = _uint16_and( b, h_e_mask );
const uint16 b_m_no_hidden_bit = _uint16_and( b, h_m_mask );
const uint16 b_em_no_hidden_bit = _uint16_or( b_e, b_m_no_hidden_bit );
const uint16 is_diff_sign_msb = _uint16_xor( a_s, b_s );
const uint16 is_a_inf_msb = _uint16_sub( h_e_mask_minus_one, a_em_no_hidden_bit );
const uint16 is_b_inf_msb = _uint16_sub( h_e_mask_minus_one, b_em_no_hidden_bit );
const uint16 is_undenorm_msb = _uint16_dec( a_e );
const uint16 is_undenorm = _uint16_ext( is_undenorm_msb );
const uint16 is_both_inf_msb = _uint16_and( is_a_inf_msb, is_b_inf_msb );
const uint16 is_invalid_inf_op_msb = _uint16_and( is_both_inf_msb, b_s );
const uint16 is_a_e_nez_msb = _uint16_neg( a_e );
const uint16 is_b_e_nez_msb = _uint16_neg( b_e );
const uint16 is_a_e_nez = _uint16_ext( is_a_e_nez_msb );
const uint16 is_b_e_nez = _uint16_ext( is_b_e_nez_msb );
const uint16 a_m_hidden_bit = _uint16_and( is_a_e_nez, h_m_hidden );
const uint16 b_m_hidden_bit = _uint16_and( is_b_e_nez, h_m_hidden );
const uint16 a_m_no_grs = _uint16_or( a_m_no_hidden_bit, a_m_hidden_bit );
const uint16 b_m_no_grs = _uint16_or( b_m_no_hidden_bit, b_m_hidden_bit );
const uint16 diff_e = _uint16_sub( a_e, b_e );
const uint16 a_e_unbias = _uint16_sub( a_e, h_e_bias_minus_one );
const uint16 a_m = _uint16_sll( a_m_no_grs, h_grs_size );
const uint16 a_e_biased = _uint16_srl( a_e, h_e_pos );
const uint16 m_sa_unbias = _uint16_srl( a_e_unbias, h_e_pos );
const uint16 m_sa_default = _uint16_srl( diff_e, h_e_pos );
const uint16 m_sa_unbias_mask = _uint16_andc( is_a_e_nez_msb, is_b_e_nez_msb );
const uint16 m_sa = _uint16_sels( m_sa_unbias_mask, m_sa_unbias, m_sa_default );
const uint16 b_m_no_sticky = _uint16_sll( b_m_no_grs, h_grs_size );
const uint16 sh_m = _uint16_srl( b_m_no_sticky, m_sa );
const uint16 sticky_overflow = _uint16_sll( one, m_sa );
const uint16 sticky_mask = _uint16_dec( sticky_overflow );
const uint16 sticky_collect = _uint16_and( b_m_no_sticky, sticky_mask );
const uint16 is_sticky_set_msb = _uint16_neg( sticky_collect );
const uint16 sticky = _uint16_srl( is_sticky_set_msb, msb_to_lsb_sa);
const uint16 b_m = _uint16_or( sh_m, sticky );
const uint16 is_c_m_ab_pos_msb = _uint16_sub( b_m, a_m );
const uint16 c_inf = _uint16_or( a_s, h_e_mask );
const uint16 c_m_sum = _uint16_add( a_m, b_m );
const uint16 c_m_diff_ab = _uint16_sub( a_m, b_m );
const uint16 c_m_diff_ba = _uint16_sub( b_m, a_m );
const uint16 c_m_smag_diff = _uint16_sels( is_c_m_ab_pos_msb, c_m_diff_ab, c_m_diff_ba );
const uint16 c_s_diff = _uint16_sels( is_c_m_ab_pos_msb, a_s, b_s );
const uint16 c_s = _uint16_sels( is_diff_sign_msb, c_s_diff, a_s );
const uint16 c_m_smag_diff_nlz = _uint16_cntlz( c_m_smag_diff );
const uint16 diff_norm_sa = _uint16_sub( c_m_smag_diff_nlz, one );
const uint16 is_diff_denorm_msb = _uint16_sub( a_e_biased, diff_norm_sa );
const uint16 is_diff_denorm = _uint16_ext( is_diff_denorm_msb );
const uint16 is_a_or_b_norm_msb = _uint16_neg( a_e_biased );
const uint16 diff_denorm_sa = _uint16_dec( a_e_biased );
const uint16 c_m_diff_denorm = _uint16_sll( c_m_smag_diff, diff_denorm_sa );
const uint16 c_m_diff_norm = _uint16_sll( c_m_smag_diff, diff_norm_sa );
const uint16 c_e_diff_norm = _uint16_sub( a_e_biased, diff_norm_sa );
const uint16 c_m_diff_ab_norm = _uint16_sels( is_diff_denorm_msb, c_m_diff_denorm, c_m_diff_norm );
const uint16 c_e_diff_ab_norm = _uint16_andc( c_e_diff_norm, is_diff_denorm );
const uint16 c_m_diff = _uint16_sels( is_a_or_b_norm_msb, c_m_diff_ab_norm, c_m_smag_diff );
const uint16 c_e_diff = _uint16_sels( is_a_or_b_norm_msb, c_e_diff_ab_norm, a_e_biased );
const uint16 is_diff_eqz_msb = _uint16_dec( c_m_diff );
const uint16 is_diff_exactly_zero_msb = _uint16_and( is_diff_sign_msb, is_diff_eqz_msb );
const uint16 is_diff_exactly_zero = _uint16_ext( is_diff_exactly_zero_msb );
const uint16 c_m_added = _uint16_sels( is_diff_sign_msb, c_m_diff, c_m_sum );
const uint16 c_e_added = _uint16_sels( is_diff_sign_msb, c_e_diff, a_e_biased );
const uint16 c_m_carry = _uint16_and( c_m_added, h_m_grs_carry );
const uint16 is_c_m_carry_msb = _uint16_neg( c_m_carry );
const uint16 c_e_hidden_offset = _uint16_andsrl( c_m_added, h_m_grs_carry, h_m_grs_carry_pos );
const uint16 c_m_sub_hidden = _uint16_srl( c_m_added, one );
const uint16 c_m_no_hidden = _uint16_sels( is_c_m_carry_msb, c_m_sub_hidden, c_m_added );
const uint16 c_e_no_hidden = _uint16_add( c_e_added, c_e_hidden_offset );
const uint16 c_m_no_hidden_msb = _uint16_and( c_m_no_hidden, h_m_msb_mask );
const uint16 undenorm_m_msb_odd = _uint16_srl( c_m_no_hidden_msb, h_m_msb_sa );
const uint16 undenorm_fix_e = _uint16_and( is_undenorm, undenorm_m_msb_odd );
const uint16 c_e_fixed = _uint16_add( c_e_no_hidden, undenorm_fix_e );
const uint16 c_m_round_amount = _uint16_and( c_m_no_hidden, h_grs_round_mask );
const uint16 c_m_rounded = _uint16_add( c_m_no_hidden, c_m_round_amount );
const uint16 c_m_round_overflow = _uint16_andsrl( c_m_rounded, h_m_grs_carry, h_m_grs_carry_pos );
const uint16 c_e_rounded = _uint16_add( c_e_fixed, c_m_round_overflow );
const uint16 c_m_no_grs = _uint16_srlm( c_m_rounded, h_grs_size, h_m_mask );
const uint16 c_e = _uint16_sll( c_e_rounded, h_e_pos );
const uint16 c_em = _uint16_or( c_e, c_m_no_grs );
const uint16 c_normal = _uint16_or( c_s, c_em );
const uint16 c_inf_result = _uint16_sels( is_a_inf_msb, c_inf, c_normal );
const uint16 c_zero_result = _uint16_andc( c_inf_result, is_diff_exactly_zero );
const uint16 c_result = _uint16_sels( is_invalid_inf_op_msb, h_snan, c_zero_result );
return (c_result);
}

View File

@ -1,9 +0,0 @@
#ifndef NV_MATH_HALF_H
#define NV_MATH_HALF_H
#include <nvmath/nvmath.h>
uint32 half_to_float( uint16 h );
uint16 half_from_float( uint32 f );
#endif /* NV_MATH_HALF_H */

View File

@ -24,8 +24,6 @@ public:
Matrix(zero_t);
Matrix(identity_t);
Matrix(const Matrix & m);
Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
Matrix(const scalar m[]); // m is assumed to contain 16 elements
scalar data(uint idx) const;
scalar & data(uint idx);
@ -77,21 +75,6 @@ inline Matrix::Matrix(const Matrix & m)
}
}
inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3)
{
m_data[ 0] = v0.x(); m_data[ 1] = v0.y(); m_data[ 2] = v0.z(); m_data[ 3] = v0.w();
m_data[ 4] = v1.x(); m_data[ 5] = v1.y(); m_data[ 6] = v1.z(); m_data[ 7] = v1.w();
m_data[ 8] = v2.x(); m_data[ 9] = v2.y(); m_data[10] = v2.z(); m_data[11] = v2.w();
m_data[12] = v3.x(); m_data[13] = v3.y(); m_data[14] = v3.z(); m_data[15] = v3.w();
}
inline Matrix::Matrix(const scalar m[])
{
for(int i = 0; i < 16; i++) {
m_data[i] = m[i];
}
}
// Accessors
inline scalar Matrix::data(uint idx) const

156
src/nvmath/Montecarlo.cpp Normal file
View File

@ -0,0 +1,156 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/Montecarlo.h>
using namespace nv;
void SampleDistribution::redistribute(Method method/*=Method_NRook*/, Distribution dist/*=Distribution_Cosine*/)
{
switch(method)
{
case Method_Random:
redistributeRandom(dist);
break;
case Method_Stratified:
redistributeStratified(dist);
break;
case Method_NRook:
redistributeNRook(dist);
break;
};
}
void SampleDistribution::redistributeRandom(const Distribution dist)
{
const uint sampleCount = m_sampleArray.count();
// This is the worst method possible!
for(uint i = 0; i < sampleCount; i++)
{
float x = m_rand.getFloat();
float y = m_rand.getFloat();
// Map uniform distribution in the square to the (hemi)sphere.
if( dist == Distribution_Uniform ) {
m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y);
}
else {
nvDebugCheck(dist == Distribution_Cosine);
m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y);
}
}
}
void SampleDistribution::redistributeStratified(const Distribution dist)
{
const uint sampleCount = m_sampleArray.count();
const uint sqrtSampleCount = uint(sqrtf(float(sampleCount)));
nvDebugCheck(sqrtSampleCount*sqrtSampleCount == sampleCount); // Must use exact powers!
// Create a uniform distribution of points on the hemisphere with low variance.
for(uint v = 0, i = 0; v < sqrtSampleCount; v++) {
for(uint u = 0; u < sqrtSampleCount; u++, i++) {
float x = (u + m_rand.getFloat()) / float(sqrtSampleCount);
float y = (v + m_rand.getFloat()) / float(sqrtSampleCount);
// Map uniform distribution in the square to the (hemi)sphere.
if( dist == Distribution_Uniform ) {
m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y);
}
else {
nvDebugCheck(dist == Distribution_Cosine);
m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y);
}
}
}
}
/** Multi-Stage N-rooks Sampling Method.
* See: http://www.acm.org/jgt/papers/WangSung9/9
*/
void SampleDistribution::multiStageNRooks(const int size, int* cells)
{
if (size == 1) {
return;
}
int size1 = size >> 1;
int size2 = size >> 1;
if (size & 1) {
if (m_rand.getFloat() > 0.5) {
size1++;
}
else {
size2++;
}
}
int* upper_cells = new int[size1];
int* lower_cells = new int[size2];
int i, j;
for(i = 0, j = 0; i < size - 1; i += 2, j++) {
if (m_rand.get() & 1) {
upper_cells[j] = cells[i];
lower_cells[j] = cells[i + 1];
}
else {
upper_cells[j] = cells[i + 1];
lower_cells[j] = cells[i];
}
}
if (size1 != size2) {
if (size1 > size2) {
upper_cells[j] = cells[i];
}
else {
lower_cells[j] = cells[i];
}
}
multiStageNRooks(size1, upper_cells);
memcpy(cells, upper_cells, size1 * sizeof(int));
delete [] upper_cells;
multiStageNRooks(size2, lower_cells);
memcpy(cells + size1, lower_cells, size2 * sizeof(int));
delete [] lower_cells;
}
void SampleDistribution::redistributeNRook(const Distribution dist)
{
const uint sampleCount = m_sampleArray.count();
// Generate nrook cells
int * cells = new int[sampleCount];
for(uint32 i = 0; i < sampleCount; i++)
{
cells[i] = i;
}
multiStageNRooks(sampleCount, cells);
for(uint i = 0; i < sampleCount; i++)
{
float x = (i + m_rand.getFloat()) / sampleCount;
float y = (cells[i] + m_rand.getFloat()) / sampleCount;
// Map uniform distribution in the square to the (hemi)sphere.
if( dist == Distribution_Uniform ) {
m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y);
}
else {
nvDebugCheck(dist == Distribution_Cosine);
m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y);
}
}
delete [] cells;
}

84
src/nvmath/Montecarlo.h Normal file
View File

@ -0,0 +1,84 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_MONTECARLO_H
#define NV_MATH_MONTECARLO_H
#include <nvmath/Vector.h>
#include <nvmath/Random.h>
namespace nv
{
/// A random sample distribution.
class SampleDistribution
{
public:
// Sampling method.
enum Method {
Method_Random,
Method_Stratified,
Method_NRook
};
// Distribution functions.
enum Distribution {
Distribution_Uniform,
Distribution_Cosine
};
/// Constructor.
SampleDistribution(int num)
{
m_sampleArray.resize(num);
}
void redistribute(Method method=Method_NRook, Distribution dist=Distribution_Cosine);
/// Get parametric coordinates of the sample.
Vector2 sample(int i) { return m_sampleArray[i].uv; }
/// Get sample direction.
Vector3 sampleDir(int i) { return m_sampleArray[i].dir; }
/// Get number of samples.
uint sampleCount() const { return m_sampleArray.count(); }
private:
void redistributeRandom(const Distribution dist);
void redistributeStratified(const Distribution dist);
void multiStageNRooks(const int size, int* cells);
void redistributeNRook(const Distribution dist);
/// A sample of the random distribution.
struct Sample
{
/// Set sample given the 3d coordinates.
void setDir(float x, float y, float z) {
dir.set(x, y, z);
uv.set(acosf(z), atan2f(y, x));
}
/// Set sample given the 2d parametric coordinates.
void setUV(float u, float v) {
uv.set(u, v);
dir.set(sinf(u) * cosf(v), sinf(u) * sinf(v), cosf(u));
}
Vector2 uv;
Vector3 dir;
};
/// Random seed.
MTRand m_rand;
/// Samples.
Array<Sample> m_sampleArray;
};
} // nv namespace
#endif // NV_MATH_MONTECARLO_H

View File

@ -3,8 +3,8 @@
#ifndef NV_MATH_PLANE_H
#define NV_MATH_PLANE_H
#include "nvmath.h"
#include "Vector.h"
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
namespace nv
{
@ -59,7 +59,7 @@ namespace nv
return Plane(plane.asVector() * inv);
}
// Get the signed distance from the given point to this plane.
// Get the distance from the given point to this plane.
inline float distance(Plane::Arg plane, Vector3::Arg point)
{
return dot(plane.vector(), point) - plane.offset();

128
src/nvmath/Quaternion.h Normal file
View File

@ -0,0 +1,128 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_QUATERNION_H
#define NV_MATH_QUATERNION_H
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
namespace nv
{
class NVMATH_CLASS Quaternion
{
public:
typedef Quaternion const & Arg;
Quaternion();
explicit Quaternion(zero_t);
Quaternion(float x, float y, float z, float w);
Quaternion(Vector4::Arg v);
const Quaternion & operator=(Quaternion::Arg v);
scalar x() const;
scalar y() const;
scalar z() const;
scalar w() const;
const Vector4 & asVector() const;
Vector4 & asVector();
private:
Vector4 q;
};
inline Quaternion::Quaternion() {}
inline Quaternion::Quaternion(zero_t) : q(zero) {}
inline Quaternion::Quaternion(float x, float y, float z, float w) : q(x, y, z, w) {}
inline Quaternion::Quaternion(Vector4::Arg v) : q(v) {}
inline const Quaternion & Quaternion::operator=(Quaternion::Arg v) { q = v.q; return *this; }
inline scalar Quaternion::x() const { return q.x(); }
inline scalar Quaternion::y() const { return q.y(); }
inline scalar Quaternion::z() const { return q.z(); }
inline scalar Quaternion::w() const { return q.w(); }
inline const Vector4 & Quaternion::asVector() const { return q; }
inline Vector4 & Quaternion::asVector() { return q; }
inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b)
{
// @@ Efficient SIMD implementation?
return Quaternion(
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(),
+ a.x() * b.y() - a.y()*b.x() + a.z()*b.w() + a.w()*b.z(),
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w());
}
inline Quaternion scale(Quaternion::Arg q, float s)
{
return scale(q.asVector(), s);
}
inline Quaternion operator *(Quaternion::Arg q, float s)
{
return scale(q, s);
}
inline Quaternion operator *(float s, Quaternion::Arg q)
{
return scale(q, s);
}
inline Quaternion scale(Quaternion::Arg q, Vector4::Arg s)
{
return scale(q.asVector(), s);
}
/*inline Quaternion operator *(Quaternion::Arg q, Vector4::Arg s)
{
return scale(q, s);
}
inline Quaternion operator *(Vector4::Arg s, Quaternion::Arg q)
{
return scale(q, s);
}*/
inline Quaternion conjugate(Quaternion::Arg q)
{
return scale(q, Vector4(-1, -1, -1, 1));
}
inline float length(Quaternion::Arg q)
{
return length(q.asVector());
}
inline bool isNormalized(Quaternion::Arg q, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(q), 1, epsilon);
}
inline Quaternion normalize(Quaternion::Arg q, float epsilon = NV_EPSILON)
{
float l = length(q);
nvDebugCheck(!isZero(l, epsilon));
Quaternion n = scale(q, 1.0f / l);
nvDebugCheck(isNormalized(n));
return n;
}
inline Quaternion inverse(Quaternion::Arg q)
{
return conjugate(normalize(q));
}
/// Create a rotation quaternion for @a angle alpha around normal vector @a v.
inline Quaternion axisAngle(Vector3::Arg v, float alpha)
{
float s = sinf(alpha * 0.5f);
float c = cosf(alpha * 0.5f);
return Quaternion(Vector4(v * s, c));
}
} // nv namespace
#endif // NV_MATH_QUATERNION_H

54
src/nvmath/Random.cpp Normal file
View File

@ -0,0 +1,54 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/Random.h>
#include <time.h>
using namespace nv;
// Statics
const uint16 Rand48::a0 = 0xE66D;
const uint16 Rand48::a1 = 0xDEEC;
const uint16 Rand48::a2 = 0x0005;
const uint16 Rand48::c0 = 0x000B;
/// Get a random seed based on the current time.
uint Rand::randomSeed()
{
return (uint)time(NULL);
}
void MTRand::initialize( uint32 seed )
{
// Initialize generator state with seed
// See Knuth TAOCP Vol 2, 3rd Ed, p.106 for multiplier.
// In previous versions, most significant bits (MSBs) of the seed affect
// only MSBs of the state array. Modified 9 Jan 2002 by Makoto Matsumoto.
uint32 *s = state;
uint32 *r = state;
int i = 1;
*s++ = seed & 0xffffffffUL;
for( ; i < N; ++i )
{
*s++ = ( 1812433253UL * ( *r ^ (*r >> 30) ) + i ) & 0xffffffffUL;
r++;
}
}
void MTRand::reload()
{
// Generate N new values in state
// Made clearer and faster by Matthew Bellew (matthew.bellew@home.com)
uint32 *p = state;
int i;
for( i = N - M; i--; ++p )
*p = twist( p[M], p[0], p[1] );
for( i = M; --i; ++p )
*p = twist( p[M-N], p[0], p[1] );
*p = twist( p[M-N], p[0], state[0] );
left = N, next = state;
}

368
src/nvmath/Random.h Normal file
View File

@ -0,0 +1,368 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_RANDOM_H
#define NV_MATH_RANDOM_H
#include <nvcore/Containers.h> // nextPowerOfTwo
#include <nvmath/nvmath.h>
namespace nv
{
/// Interface of the random number generators.
class Rand
{
public:
virtual ~Rand() {}
enum time_e { Time };
/// Provide a new seed.
virtual void seed( uint s ) { /* empty */ };
/// Get an integer random number.
virtual uint get() = 0;
/// Get a random number on [0, max] interval.
uint getRange( uint max )
{
uint n;
// uint mask = Bitmask( max );
// do { n = Get() & mask; } while( n > max );
uint np2 = nextPowerOfTwo( max );
do { n = get() & (np2-1); } while( n > max );
return n;
}
/// Random number on [0.0, 1.0] interval.
float getFloat()
{
union
{
uint32 i;
float f;
} pun;
pun.i = 0x3f800000UL | (get() & 0x007fffffUL);
return pun.f - 1.0f;
}
/*
/// Random number on [0.0, 1.0] interval.
double getReal()
{
return double(get()) * (1.0/4294967295.0); // 2^32-1
}
/// Random number on [0.0, 1.0) interval.
double getRealExclusive()
{
return double(get()) * (1.0/4294967296.0); // 2^32
}
*/
/// Get the max value of the random number.
uint max() const { return 4294967295U; }
// Get a random seed.
static uint randomSeed();
};
/// Very simple random number generator with low storage requirements.
class SimpleRand : public Rand
{
public:
/// Constructor that uses the current time as the seed.
SimpleRand( time_e )
{
seed(randomSeed());
}
/// Constructor that uses the given seed.
SimpleRand( uint s = 0 )
{
seed(s);
}
/// Set the given seed.
virtual void seed( uint s )
{
current = s;
}
/// Get a random number.
virtual uint get()
{
return current = current * 1103515245 + 12345;
}
private:
uint current;
};
/// Mersenne twister random number generator.
class MTRand : public Rand
{
public:
enum { N = 624 }; // length of state vector
enum { M = 397 };
/// Constructor that uses the current time as the seed.
MTRand( time_e )
{
seed(randomSeed());
}
/// Constructor that uses the given seed.
MTRand( uint s = 0 )
{
seed(s);
}
/// Constructor that uses the given seeds.
NVMATH_API MTRand( const uint * seed_array, uint length );
/// Provide a new seed.
virtual void seed( uint s )
{
initialize(s);
reload();
}
/// Get a random number between 0 - 65536.
virtual uint get()
{
// Pull a 32-bit integer from the generator state
// Every other access function simply transforms the numbers extracted here
if( left == 0 ) {
reload();
}
left--;
uint s1;
s1 = *next++;
s1 ^= (s1 >> 11);
s1 ^= (s1 << 7) & 0x9d2c5680U;
s1 ^= (s1 << 15) & 0xefc60000U;
return ( s1 ^ (s1 >> 18) );
};
private:
NVMATH_API void initialize( uint32 seed );
NVMATH_API void reload();
uint hiBit( uint u ) const { return u & 0x80000000U; }
uint loBit( uint u ) const { return u & 0x00000001U; }
uint loBits( uint u ) const { return u & 0x7fffffffU; }
uint mixBits( uint u, uint v ) const { return hiBit(u) | loBits(v); }
uint twist( uint m, uint s0, uint s1 ) const { return m ^ (mixBits(s0,s1)>>1) ^ ((~loBit(s1)+1) & 0x9908b0dfU); }
private:
uint state[N]; // internal state
uint * next; // next value to get from state
int left; // number of values left before reload needed
};
/** George Marsaglia's random number generator.
* Code based on Thatcher Ulrich public domain source code:
* http://cvs.sourceforge.net/viewcvs.py/tu-testbed/tu-testbed/base/tu_random.cpp?rev=1.7&view=auto
*
* PRNG code adapted from the complimentary-multiply-with-carry
* code in the article: George Marsaglia, "Seeds for Random Number
* Generators", Communications of the ACM, May 2003, Vol 46 No 5,
* pp90-93.
*
* The article says:
*
* "Any one of the choices for seed table size and multiplier will
* provide a RNG that has passed extensive tests of randomness,
* particularly those in [3], yet is simple and fast --
* approximately 30 million random 32-bit integers per second on a
* 850MHz PC. The period is a*b^n, where a is the multiplier, n
* the size of the seed table and b=2^32-1. (a is chosen so that
* b is a primitive root of the prime a*b^n + 1.)"
*
* [3] Marsaglia, G., Zaman, A., and Tsang, W. Toward a universal
* random number generator. _Statistics and Probability Letters
* 8_ (1990), 35-39.
*/
class GMRand : public Rand
{
public:
enum { SEED_COUNT = 8 };
// const uint64 a = 123471786; // for SEED_COUNT=1024
// const uint64 a = 123554632; // for SEED_COUNT=512
// const uint64 a = 8001634; // for SEED_COUNT=255
// const uint64 a = 8007626; // for SEED_COUNT=128
// const uint64 a = 647535442; // for SEED_COUNT=64
// const uint64 a = 547416522; // for SEED_COUNT=32
// const uint64 a = 487198574; // for SEED_COUNT=16
// const uint64 a = 716514398U; // for SEED_COUNT=8
enum { a = 716514398U };
GMRand( time_e )
{
seed(randomSeed());
}
GMRand(uint s = 987654321)
{
seed(s);
}
/// Provide a new seed.
virtual void seed( uint s )
{
c = 362436;
i = SEED_COUNT - 1;
for(int i = 0; i < SEED_COUNT; i++) {
s = s ^ (s << 13);
s = s ^ (s >> 17);
s = s ^ (s << 5);
Q[i] = s;
}
}
/// Get a random number between 0 - 65536.
virtual uint get()
{
const uint32 r = 0xFFFFFFFE;
uint64 t;
uint32 x;
i = (i + 1) & (SEED_COUNT - 1);
t = a * Q[i] + c;
c = uint32(t >> 32);
x = uint32(t + c);
if( x < c ) {
x++;
c++;
}
uint32 val = r - x;
Q[i] = val;
return val;
};
private:
uint32 c;
uint32 i;
uint32 Q[8];
};
/** Random number implementation from the GNU Sci. Lib. (GSL).
* Adapted from Nicholas Chapman version:
*
* Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough
* This is the Unix rand48() generator. The generator returns the
* upper 32 bits from each term of the sequence,
*
* x_{n+1} = (a x_n + c) mod m
*
* using 48-bit unsigned arithmetic, with a = 0x5DEECE66D , c = 0xB
* and m = 2^48. The seed specifies the upper 32 bits of the initial
* value, x_1, with the lower 16 bits set to 0x330E.
*
* The theoretical value of x_{10001} is 244131582646046.
*
* The period of this generator is ? FIXME (probably around 2^48).
*/
class Rand48 : public Rand
{
public:
Rand48( time_e )
{
seed(randomSeed());
}
Rand48( uint s = 0x1234ABCD )
{
seed(s);
}
/** Set the given seed. */
virtual void seed( uint s ) {
vstate.x0 = 0x330E;
vstate.x1 = uint16(s & 0xFFFF);
vstate.x2 = uint16((s >> 16) & 0xFFFF);
}
/** Get a random number. */
virtual uint get() {
advance();
uint x1 = vstate.x1;
uint x2 = vstate.x2;
return (x2 << 16) + x1;
}
private:
void advance()
{
/* work with unsigned long ints throughout to get correct integer
promotions of any unsigned short ints */
const uint32 x0 = vstate.x0;
const uint32 x1 = vstate.x1;
const uint32 x2 = vstate.x2;
uint32 a;
a = a0 * x0 + c0;
vstate.x0 = uint16(a & 0xFFFF);
a >>= 16;
/* although the next line may overflow we only need the top 16 bits
in the following stage, so it does not matter */
a += a0 * x1 + a1 * x0;
vstate.x1 = uint16(a & 0xFFFF);
a >>= 16;
a += a0 * x2 + a1 * x1 + a2 * x0;
vstate.x2 = uint16(a & 0xFFFF);
}
private:
NVMATH_API static const uint16 a0, a1, a2, c0;
struct rand48_state_t {
uint16 x0, x1, x2;
} vstate;
};
} // nv namespace
#endif // NV_MATH_RANDOM_H

View File

@ -0,0 +1,241 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/SphericalHarmonic.h>
using namespace nv;
namespace
{
// Basic integer factorial.
inline static int factorial( int v )
{
if (v == 0) {
return 1;
}
int result = v;
while (--v > 0) {
result *= v;
}
return result;
}
// Double factorial.
// Defined as: n!! = n*(n - 2)*(n - 4)..., n!!(0,-1) = 1.
inline static int doubleFactorial( int x )
{
if (x == 0 || x == -1) {
return 1;
}
int result = x;
while ((x -= 2) > 0) {
result *= x;
}
return result;
}
/// Normalization constant for spherical harmonic.
/// @param l is the band.
/// @param m is the argument, in the range [0, m]
inline static float K( int l, int m )
{
nvDebugCheck( m >= 0 );
return sqrtf(((2 * l + 1) * factorial(l - m)) / (4 * PI * factorial(l + m)));
}
/// Normalization constant for hemispherical harmonic.
inline static float HK( int l, int m )
{
nvDebugCheck( m >= 0 );
return sqrtf(((2 * l + 1) * factorial(l - m)) / (2 * PI * factorial(l + m)));
}
/// Evaluate Legendre polynomial. */
static float legendre( int l, int m, float x )
{
// piDebugCheck( m >= 0 );
// piDebugCheck( m <= l );
// piDebugCheck( fabs(x) <= 1 );
// Rule 2 needs no previous results
if (l == m) {
return powf(-1.0f, float(m)) * doubleFactorial(2 * m - 1) * powf(1 - x*x, 0.5f * m);
}
// Rule 3 requires the result for the same argument of the previous band
if (l == m + 1) {
return x * (2 * m + 1) * legendrePolynomial(m, m, x);
}
// Main reccurence used by rule 1 that uses result of the same argument from
// the previous two bands
return (x * (2 * l - 1) * legendrePolynomial(l - 1, m, x) - (l + m - 1) * legendrePolynomial(l - 2, m, x)) / (l - m);
}
template <int l, int m> float legendre(float x);
template <> float legendre<0, 0>(float x) {
return 1;
}
template <> float legendre<1, 0>(float x) {
return x;
}
template <> float legendre<1, 1>(float x) {
return -sqrtf(1 - x * x);
}
template <> float legendre<2, 0>(float x) {
return -0.5f + (3 * x * x) / 2;
}
template <> float legendre<2, 1>(float x) {
return -3 * x * sqrtf(1 - x * x);
}
template <> float legendre<2, 2>(float x) {
return -3 * (-1 + x * x);
}
template <> float legendre<3, 0>(float x) {
return -(3 * x) / 2 + (5 * x * x * x) / 2;
}
template <> float legendre<3, 1>(float x) {
return -3 * sqrtf(1 - x * x) / 2 * (-1 + 5 * x * x);
}
template <> float legendre<3, 2>(float x) {
return -15 * (-x + x * x * x);
}
template <> float legendre<3, 3>(float x) {
return -15 * powf(1 - x * x, 1.5f);
}
template <> float legendre<4, 0>(float x) {
return 0.125f * (3.0f - 30.0f * x * x + 35.0f * x * x * x * x);
}
template <> float legendre<4, 1>(float x) {
return -2.5f * x * sqrtf(1.0f - x * x) * (7.0f * x * x - 3.0f);
}
template <> float legendre<4, 2>(float x) {
return -7.5f * (1.0f - 8.0f * x * x + 7.0f * x * x * x * x);
}
template <> float legendre<4, 3>(float x) {
return -105.0f * x * powf(1 - x * x, 1.5f);
}
template <> float legendre<4, 4>(float x) {
return 105.0f * (x * x - 1.0f) * (x * x - 1.0f);
}
} // namespace
float nv::legendrePolynomial(int l, int m, float x)
{
switch(l)
{
case 0:
return legendre<0, 0>(x);
case 1:
if(m == 0) return legendre<1, 0>(x);
return legendre<1, 1>(x);
case 2:
if(m == 0) return legendre<2, 0>(x);
else if(m == 1) return legendre<2, 1>(x);
return legendre<2, 2>(x);
case 3:
if(m == 0) return legendre<3, 0>(x);
else if(m == 1) return legendre<3, 1>(x);
else if(m == 2) return legendre<3, 2>(x);
return legendre<3, 3>(x);
case 4:
if(m == 0) return legendre<4, 0>(x);
else if(m == 1) return legendre<4, 1>(x);
else if(m == 2) return legendre<4, 2>(x);
else if(m == 3) return legendre<4, 3>(x);
else return legendre<4, 4>(x);
}
// Fallback to the expensive version.
return legendre(l, m, x);
}
/**
* Evaluate the spherical harmonic function for the given angles.
* @param l is the band.
* @param m is the argument, in the range [-l,l]
* @param theta is the altitude, in the range [0, PI]
* @param phi is the azimuth, in the range [0, 2*PI]
*/
float nv::y( int l, int m, float theta, float phi )
{
if( m == 0 ) {
// K(l, 0) = sqrt((2*l+1)/(4*PI))
return sqrtf((2 * l + 1) / (4 * PI)) * legendrePolynomial(l, 0, cosf(theta));
}
else if( m > 0 ) {
return sqrtf(2.0f) * K(l, m) * cosf(m * phi) * legendrePolynomial(l, m, cosf(theta));
}
else {
return sqrtf(2.0f) * K(l, -m) * sinf(-m * phi) * legendrePolynomial(l, -m, cosf(theta));
}
}
/**
* Real spherical harmonic function of an unit vector. Uses the following
* equalities to call the angular function:
* x = sin(theta)*cos(phi)
* y = sin(theta)*sin(phi)
* z = cos(theta)
*/
float nv::y( int l, int m, Vector3::Arg v )
{
float theta = acosf(v.z());
float phi = atan2f(v.y(), v.x());
return y( l, m, theta, phi );
}
/**
* Evaluate the hemispherical harmonic function for the given angles.
* @param l is the band.
* @param m is the argument, in the range [-l,l]
* @param theta is the altitude, in the range [0, PI/2]
* @param phi is the azimuth, in the range [0, 2*PI]
*/
float nv::hy( int l, int m, float theta, float phi )
{
if( m == 0 ) {
// HK(l, 0) = sqrt((2*l+1)/(2*PI))
return sqrtf((2 * l + 1) / (2 * PI)) * legendrePolynomial(l, 0, 2*cosf(theta)-1);
}
else if( m > 0 ) {
return sqrtf(2.0f) * HK(l, m) * cosf(m * phi) * legendrePolynomial(l, m, 2*cosf(theta)-1);
}
else {
return sqrtf(2.0f) * HK(l, -m) * sinf(-m * phi) * legendrePolynomial(l, -m, 2*cosf(theta)-1);
}
}
/**
* Real hemispherical harmonic function of an unit vector. Uses the following
* equalities to call the angular function:
* x = sin(theta)*cos(phi)
* y = sin(theta)*sin(phi)
* z = cos(theta)
*/
float nv::hy( int l, int m, Vector3::Arg v )
{
float theta = acosf(v.z());
float phi = atan2f(v.y(), v.x());
return y( l, m, theta, phi );
}

View File

@ -0,0 +1,419 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_SPHERICALHARMONIC_H
#define NV_MATH_SPHERICALHARMONIC_H
#include <nvmath/Vector.h>
namespace nv
{
NVMATH_API float legendrePolynomial( int l, int m, float x ) NV_CONST;
NVMATH_API float y( int l, int m, float theta, float phi ) NV_CONST;
NVMATH_API float y( int l, int m, Vector3::Arg v ) NV_CONST;
NVMATH_API float hy( int l, int m, float theta, float phi ) NV_CONST;
NVMATH_API float hy( int l, int m, Vector3::Arg v ) NV_CONST;
class Sh;
float dot(const Sh & a, const Sh & b) NV_CONST;
/// Spherical harmonic class.
class Sh
{
friend class Sh2;
friend class ShMatrix;
public:
/// Construct a spherical harmonic of the given order.
Sh(int o) : m_order(o)
{
m_elemArray = new float[basisNum()];
}
/// Copy constructor.
Sh(const Sh & sh) : m_order(sh.order())
{
m_elemArray = new float[basisNum()];
memcpy(m_elemArray, sh.m_elemArray, sizeof(float) * basisNum());
}
/// Destructor.
~Sh()
{
delete [] m_elemArray;
m_elemArray = NULL;
}
/// Get number of bands.
static int bandNum(int order) {
return order + 1;
}
/// Get number of sh basis.
static int basisNum(int order) {
return (order + 1) * (order + 1);
}
/// Get the index for the given coefficients.
static int index( int l, int m ) {
return l * l + l + m;
}
/// Get sh order.
int order() const
{
return m_order;
}
/// Get sh order.
int bandNum() const
{
return bandNum(m_order);
}
/// Get sh order.
int basisNum() const
{
return basisNum(m_order);
}
/// Get sh coefficient indexed by l,m.
float elem( int l, int m ) const
{
return m_elemArray[index(l, m)];
}
/// Get sh coefficient indexed by l,m.
float & elem( int l, int m )
{
return m_elemArray[index(l, m)];
}
/// Get sh coefficient indexed by i.
float elemAt( int i ) const {
return m_elemArray[i];
}
/// Get sh coefficient indexed by i.
float & elemAt( int i )
{
return m_elemArray[i];
}
/// Reset the sh coefficients.
void reset()
{
for( int i = 0; i < basisNum(); i++ ) {
m_elemArray[i] = 0.0f;
}
}
/// Copy spherical harmonic.
void operator= ( const Sh & sh )
{
nvDebugCheck(order() <= sh.order());
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] = sh.m_elemArray[i];
}
}
/// Add spherical harmonics.
void operator+= ( const Sh & sh )
{
nvDebugCheck(order() == sh.order());
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] += sh.m_elemArray[i];
}
}
/// Substract spherical harmonics.
void operator-= ( const Sh & sh )
{
nvDebugCheck(order() == sh.order());
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] -= sh.m_elemArray[i];
}
}
// Not exactly convolution, nor product.
void operator*= ( const Sh & sh )
{
nvDebugCheck(order() == sh.order());
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] *= sh.m_elemArray[i];
}
}
/// Scale spherical harmonics.
void operator*= ( float f )
{
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] *= f;
}
}
/// Add scaled spherical harmonics.
void addScaled( const Sh & sh, float f )
{
nvDebugCheck(order() == sh.order());
for(int i = 0; i < basisNum(); i++) {
m_elemArray[i] += sh.m_elemArray[i] * f;
}
}
/*/// Add a weighted sample to the sh coefficients.
void AddSample( const Vec3 & dir, const Color3f & color, float w=1.0f ) {
for(int l = 0; l <= order; l++) {
for(int m = -l; m <= l; m++) {
Color3f & elem = GetElem(l, m);
elem.Mad( elem, color, w * y(l, m, dir) );
}
}
}*/
/// Evaluate
void eval(Vector3::Arg dir)
{
for(int l = 0; l <= m_order; l++) {
for(int m = -l; m <= l; m++) {
elem(l, m) = y(l, m, dir);
}
}
}
/// Evaluate the spherical harmonic function.
float sample(Vector3::Arg dir) const
{
Sh sh(order());
sh.eval(dir);
return dot(sh, *this);
}
protected:
const int m_order;
float * m_elemArray;
};
/// Compute dot product of the spherical harmonics.
inline float dot(const Sh & a, const Sh & b)
{
nvDebugCheck(a.order() == b.order());
float sum = 0;
for( int i = 0; i < Sh::basisNum(a.order()); i++ ) {
sum += a.elemAt(i) * b.elemAt(i);
}
return sum;
}
/// Second order spherical harmonic.
class Sh2 : public Sh
{
public:
/// Constructor.
Sh2() : Sh(2) {}
/// Copy constructor.
Sh2(const Sh2 & sh) : Sh(sh) {}
/// Spherical harmonic resulting from projecting the clamped cosine transfer function to the SH basis.
void cosineTransfer()
{
const float c1 = 0.282095f; // K(0, 0)
const float c2 = 0.488603f; // K(1, 0)
const float c3 = 1.092548f; // sqrt(15.0f / PI) / 2.0f = K(2, -2)
const float c4 = 0.315392f; // sqrt(5.0f / PI) / 4.0f) = K(2, 0)
const float c5 = 0.546274f; // sqrt(15.0f / PI) / 4.0f) = K(2, 2)
const float normalization = PI * 16.0f / 17.0f;
const float const1 = c1 * normalization * 1.0f;
const float const2 = c2 * normalization * (2.0f / 3.0f);
const float const3 = c3 * normalization * (1.0f / 4.0f);
const float const4 = c4 * normalization * (1.0f / 4.0f);
const float const5 = c5 * normalization * (1.0f / 4.0f);
m_elemArray[0] = const1;
m_elemArray[1] = -const2;
m_elemArray[2] = const2;
m_elemArray[3] = -const2;
m_elemArray[4] = const3;
m_elemArray[5] = -const3;
m_elemArray[6] = const4;
m_elemArray[7] = -const3;
m_elemArray[8] = const5;
}
};
#if 0
/// Spherical harmonic matrix.
class ShMatrix
{
public:
/// Create an identity matrix of the given order.
ShMatrix(int o = 2) : order(o), identity(true)
{
nvCheck(order > 0);
e = new float[Size()];
band = new float *[GetBandNum()];
setupBands();
}
/// Destroy and free matrix elements.
~ShMatrix()
{
delete e;
delete band;
}
/// Set identity matrix.
void setIdentity()
{
identity = true;
}
/// Return true if this is an identity matrix, false in other case.
bool isIdentity() const {
return identity;
}
/// Get number of bands of this matrix.
int bandNum() const
{
return order+1;
}
/// Get total number of elements in the matrix.
int size() const
{
int size = 0;
for( int i = 0; i < bandNum(); i++ ) {
size += SQ(i * 2 + 1);
}
return size;
}
/// Get element at the given raw index.
float elem(const int idx) const
{
return e[idx];
}
/// Get element at the given with the given indices.
float & elem( const int b, const int x, const int y )
{
nvDebugCheck(b >= 0);
nvDebugCheck(b < bandNum());
return band[b][(b + y) * (b * 2 + 1) + (b + x)];
}
/// Get element at the given with the given indices.
float elem( const int b, const int x, const int y ) const
{
nvDebugCheck(b >= 0);
nvDebugCheck(b < bandNum());
return band[b][(b + y) * (b * 2 + 1) + (b + x)];
}
/** Copy matrix. */
void Copy( const ShMatrix & m )
{
nvDebugCheck(order == m.order);
memcpy(e, m.e, Size() * sizeof(float));
}
/** Rotate the given coefficients. */
void transform( const Sh & restrict source, Sh * restrict dest ) const {
piCheck( &source != dest ); // Make sure there's no aliasing.
piCheck( dest->order <= order );
piCheck( order <= source.order );
if( identity ) {
*dest = source;
return;
}
// Loop through each band.
for( int l = 0; l <= dest->order; l++ ) {
for( int mo = -l; mo <= l; mo++ ) {
Color3f rgb = Color3f::Black;
for( int mi = -l; mi <= l; mi++ ) {
rgb.Mad( rgb, source.elem(l, mi), elem(l, mo, mi) );
}
dest->elem(l, mo) = rgb;
}
}
}
MATHLIB_API void multiply( const ShMatrix &A, const ShMatrix &B );
MATHLIB_API void rotation( const Matrix & m );
MATHLIB_API void rotation( int axis, float angles );
MATHLIB_API void print();
private:
// @@ These could be static indices precomputed only once.
/// Setup the band pointers.
void setupBands()
{
int size = 0;
for( int i = 0; i < bandNum(); i++ ) {
band[i] = &e[size];
size += SQ(i * 2 + 1);
}
}
private:
// Matrix order.
const int m_order;
// Identity flag for quick transform.
bool m_identity;
// Array of elements.
float * m_e;
// Band pointers.
float ** m_band;
};
#endif // 0
} // nv namespace
#endif // NV_MATH_SPHERICALHARMONIC_H

226
src/nvmath/TriBox.cpp Normal file
View File

@ -0,0 +1,226 @@
/********************************************************/
/* AABB-triangle overlap test code */
/* by Tomas Akenine-M<>ller */
/* Function: int triBoxOverlap(float boxcenter[3], */
/* float boxhalfsize[3],float triverts[3][3]); */
/* History: */
/* 2001-03-05: released the code in its first version */
/* 2001-06-18: changed the order of the tests, faster */
/* */
/* Acknowledgement: Many thanks to Pierre Terdiman for */
/* suggestions and discussions on how to optimize code. */
/* Thanks to David Hunt for finding a ">="-bug! */
/********************************************************/
#include <nvmath/Vector.h>
#include <nvmath/Triangle.h>
using namespace nv;
#define X 0
#define Y 1
#define Z 2
#define FINDMINMAX(x0,x1,x2,min,max) \
min = max = x0; \
if(x1<min) min=x1;\
if(x1>max) max=x1;\
if(x2<min) min=x2;\
if(x2>max) max=x2;
static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg maxbox) // -NJMP-
{
Vector3 vmin, vmax;
float signs[3] = {1, 1, 1};
if (normal.x() <= 0.0f) signs[0] = -1;
if (normal.y() <= 0.0f) signs[1] = -1;
if (normal.z() <= 0.0f) signs[2] = -1;
Vector3 sign(signs[0], signs[1], signs[2]);
vmin = -scale(sign, maxbox) - vert;
vmax = scale(sign, maxbox) - vert;
if (dot(normal, vmin) > 0.0f) return false;
if (dot(normal, vmax) >= 0.0f) return true;
return false;
}
/*======================== X-tests ========================*/
#define AXISTEST_X01(a, b, fa, fb) \
p0 = a*v0.y() - b*v0.z(); \
p2 = a*v2.y() - b*v2.z(); \
if(p0<p2) {min=p0; max=p2;} else {min=p2; max=p0;} \
rad = fa * boxhalfsize.y() + fb * boxhalfsize.z(); \
if(min>rad || max<-rad) return false;
#define AXISTEST_X2(a, b, fa, fb) \
p0 = a*v0.y() - b*v0.z(); \
p1 = a*v1.y() - b*v1.z(); \
if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} \
rad = fa * boxhalfsize.y() + fb * boxhalfsize.z(); \
if(min>rad || max<-rad) return false;
/*======================== Y-tests ========================*/
#define AXISTEST_Y02(a, b, fa, fb) \
p0 = -a*v0.x() + b*v0.z(); \
p2 = -a*v2.x() + b*v2.z(); \
if(p0<p2) {min=p0; max=p2;} else {min=p2; max=p0;} \
rad = fa * boxhalfsize.x() + fb * boxhalfsize.z(); \
if(min>rad || max<-rad) return false;
#define AXISTEST_Y1(a, b, fa, fb) \
p0 = -a*v0.x() + b*v0.z(); \
p1 = -a*v1.x() + b*v1.z(); \
if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} \
rad = fa * boxhalfsize.x() + fb * boxhalfsize.z(); \
if(min>rad || max<-rad) return false;
/*======================== Z-tests ========================*/
#define AXISTEST_Z12(a, b, fa, fb) \
p1 = a*v1.x() - b*v1.y(); \
p2 = a*v2.x() - b*v2.y(); \
if(p2<p1) {min=p2; max=p1;} else {min=p1; max=p2;} \
rad = fa * boxhalfsize.x() + fb * boxhalfsize.y(); \
if(min>rad || max<-rad) return false;
#define AXISTEST_Z0(a, b, fa, fb) \
p0 = a*v0.x() - b*v0.y(); \
p1 = a*v1.x() - b*v1.y(); \
if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} \
rad = fa * boxhalfsize.x() + fb * boxhalfsize.y(); \
if(min>rad || max<-rad) return false;
bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
{
// use separating axis theorem to test overlap between triangle and box
// need to test for overlap in these directions:
// 1) the {x,y,z}-directions (actually, since we use the AABB of the triangle
// we do not even need to test these)
// 2) normal of the triangle
// 3) crossproduct(edge from tri, {x,y,z}-directin)
// this gives 3x3=9 more tests
Vector3 v0, v1, v2;
float min, max, p0, p1, p2, rad, fex, fey, fez;
Vector3 normal, e0, e1, e2;
// This is the fastest branch on Sun.
// move everything so that the boxcenter is in (0,0,0)
v0 = tri.v[0] - boxcenter;
v1 = tri.v[1] - boxcenter;
v2 = tri.v[2] - boxcenter;
// Compute triangle edges.
e0 = v1 - v0; // tri edge 0
e1 = v2 - v1; // tri edge 1
e2 = v0 - v2; // tri edge 2
// Bullet 3:
// test the 9 tests first (this was faster)
fex = fabsf(e0.x());
fey = fabsf(e0.y());
fez = fabsf(e0.z());
AXISTEST_X01(e0.z(), e0.y(), fez, fey);
AXISTEST_Y02(e0.z(), e0.x(), fez, fex);
AXISTEST_Z12(e0.y(), e0.x(), fey, fex);
fex = fabsf(e1.x());
fey = fabsf(e1.y());
fez = fabsf(e1.z());
AXISTEST_X01(e1.z(), e1.y(), fez, fey);
AXISTEST_Y02(e1.z(), e1.x(), fez, fex);
AXISTEST_Z0(e1.y(), e1.x(), fey, fex);
fex = fabsf(e2.x());
fey = fabsf(e2.y());
fez = fabsf(e2.z());
AXISTEST_X2(e2.z(), e2.y(), fez, fey);
AXISTEST_Y1(e2.z(), e2.x(), fez, fex);
AXISTEST_Z12(e2.y(), e2.x(), fey, fex);
// Bullet 1:
// first test overlap in the {x,y,z}-directions
// find min, max of the triangle each direction, and test for overlap in
// that direction -- this is equivalent to testing a minimal AABB around
// the triangle against the AABB
// test in X-direction
FINDMINMAX(v0.x(), v1.x(), v2.x(), min, max);
if(min > boxhalfsize.x() || max < -boxhalfsize.x()) return false;
// test in Y-direction
FINDMINMAX(v0.y(), v1.y(), v2.y(), min, max);
if(min > boxhalfsize.y() || max < -boxhalfsize.y()) return false;
// test in Z-direction
FINDMINMAX(v0.z(), v1.z(), v2.z(), min, max);
if(min > boxhalfsize.z() || max < -boxhalfsize.z()) return false;
// Bullet 2:
// test if the box intersects the plane of the triangle
// compute plane equation of triangle: normal*x+d=0
normal = cross(e0, e1);
return planeBoxOverlap(normal, v0, boxhalfsize);
}
bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
{
// use separating axis theorem to test overlap between triangle and box
// need to test for overlap in these directions:
// 1) the {x,y,z}-directions (actually, since we use the AABB of the triangle
// we do not even need to test these)
// 2) normal of the triangle
// 3) crossproduct(edge from tri, {x,y,z}-directin)
// this gives 3x3=9 more tests
Vector3 v0, v1, v2;
float min, max, p0, p1, p2, rad, fex, fey, fez;
Vector3 normal, e0, e1, e2;
// This is the fastest branch on Sun.
// move everything so that the boxcenter is in (0,0,0)
v0 = tri.v[0] - boxcenter;
v1 = tri.v[1] - boxcenter;
v2 = tri.v[2] - boxcenter;
// Compute triangle edges.
e0 = v1 - v0; // tri edge 0
e1 = v2 - v1; // tri edge 1
e2 = v0 - v2; // tri edge 2
// Bullet 3:
// test the 9 tests first (this was faster)
fex = fabsf(e0.x());
fey = fabsf(e0.y());
fez = fabsf(e0.z());
AXISTEST_X01(e0.z(), e0.y(), fez, fey);
AXISTEST_Y02(e0.z(), e0.x(), fez, fex);
AXISTEST_Z12(e0.y(), e0.x(), fey, fex);
fex = fabsf(e1.x());
fey = fabsf(e1.y());
fez = fabsf(e1.z());
AXISTEST_X01(e1.z(), e1.y(), fez, fey);
AXISTEST_Y02(e1.z(), e1.x(), fez, fex);
AXISTEST_Z0(e1.y(), e1.x(), fey, fex);
fex = fabsf(e2.x());
fey = fabsf(e2.y());
fez = fabsf(e2.z());
AXISTEST_X2(e2.z(), e2.y(), fez, fey);
AXISTEST_Y1(e2.z(), e2.x(), fez, fex);
AXISTEST_Z12(e2.y(), e2.x(), fey, fex);
// Bullet 2:
// test if the box intersects the plane of the triangle
// compute plane equation of triangle: normal*x+d=0
normal = cross(e0, e1);
return planeBoxOverlap(normal, v0, boxhalfsize);
}

168
src/nvmath/Triangle.cpp Normal file
View File

@ -0,0 +1,168 @@
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
#include <nvmath/Triangle.h>
using namespace nv;
/// Tomas M<>ller, barycentric ray-triangle test.
bool rayTest_Moller(const Triangle & t, Vector3::Arg orig, Vector3::Arg dir, float * out_t, float * out_u, float * out_v)
{
// find vectors for two edges sharing vert0
Vector3 e1 = t.v[1] - t.v[0];
Vector3 e2 = t.v[2] - t.v[0];
// begin calculating determinant - also used to calculate U parameter
Vector3 pvec = cross(dir, e2);
// if determinant is near zero, ray lies in plane of triangle
float det = dot(e1, pvec);
if (det < -NV_EPSILON) {
return false;
}
// calculate distance from vert0 to ray origin
Vector3 tvec = orig - t.v[0];
// calculate U parameter and test bounds
float u = dot(tvec, pvec);
if( u < 0.0f || u > det ) {
return false;
}
// prepare to test V parameter
Vector3 qvec = cross(tvec, e1);
// calculate V parameter and test bounds
float v = dot(dir, qvec);
if (v < 0.0f || u + v > det) {
return false;
}
// calculate t, scale parameters, ray intersects triangle
float inv_det = 1.0f / det;
*out_t = dot(e2, qvec) * inv_det;
*out_u = u * inv_det; // v
*out_v = v * inv_det; // 1-(u+v)
return true;
}
#if 0
// IC: This code is adapted from my Pi.MathLib code, based on Moller-Trumbore triangle test.
FXVector3 edge1, edge2, pvec, tvec, qvec;
edge1 = tri.V1 - tri.V0;
edge2 = tri.V2 - tri.V0;
pvec.Cross(ray.Direction, edge2);
float det = FXVector3.Dot(edge1, pvec);
// calculate distance from vert0 to ray origin.
FXVector3 tvec = ray.Origin - vert0;
if( det < 0 )
{
// calculate U parameter and test bounds.
float u = FXVector3.Dot(tvec, pvec);
if (u > 0.0 || u < det)
{
return false;
}
// prepare to test V parameter.
qvec.Cross(tvec, edge1);
// calculate V parameter and test bounds.
float v = FXVector3.Dot(dir, qvec);
return v <= 0.0 && u + v >= det;
}
else
{
// calculate U parameter and test bounds.
float u = FXVector3.Dot(tvec, pvec);
if (u < 0.0 || u > det)
{
return false;
}
// prepare to test V parameter.
qvec.Cross(tvec, edge1);
// calculate V parameter and test bounds.
float v = FXVector3.Dot(dir, qvec);
return v >= 0.0 && u + v <= det;
}
/**
* Dan Sunday, parametric ray-triangle test.
*/
// Output: *I = intersection point (when it exists)
// Return: -1 = triangle is degenerate (a segment or point)
// 0 = disjoint (no intersect)
// 1 = intersect in unique point I1
// 2 = are in the same plane
bool RayTriangleTest( const Vec3 &p0, const Vec3 &p1,
const Vec3 &v0, const Vec3 &v1, const Vec3 &v2, const Vec3 &n,
Vec3 &I ) {
Vec3 u, v; // triangle vectors
Vec3 dir, w0, w; // ray vectors
float r, a, b; // params to calc ray-plane intersect
// get triangle edge vectors and plane normal
u.Sub( v1, v0 );
v.Sub( v2, v0 );
dir.Sub( p1, p0 ); // ray direction vector
w0.Sub( p0, v0 );
a = Vec3DotProduct( n, w0 );
b = Vec3DotProduct( n, dir );
if( fabs(b) < TI_EPSILON ) // ray is parallel to triangle plane
return false;
// get intersect point of ray with triangle plane
r = -a / b;
if( r < 0.0f ) // ray goes away from triangle
return false; // => no intersect
// for a segment, also test if (r > 1.0) => no intersect
I.Mad( p0, dir, r ); // intersect point of ray and plane
// is I inside T?
float uu, uv, vv, wu, wv, D;
uu = Vec3DotProduct( u, u );
uv = Vec3DotProduct( u, v );
vv = Vec3DotProduct( v, v );
w = I - v0;
wu = Vec3DotProduct( w, u );
wv = Vec3DotProduct( w, v );
D = uv * uv - uu * vv;
// get and test parametric coords
float s, t;
s = (uv * wv - vv * wu) / D;
if( s<0.0 || s > 1.0) // I is outside T
return false;
t = (uv * wu - uu * wv) / D;
if( t<0.0 || (s + t) > 1.0) // I is outside T
return false;
return true; // I is in T
}
#endif // 0

81
src/nvmath/Triangle.h Normal file
View File

@ -0,0 +1,81 @@
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
#ifndef NV_MATH_TRIANGLE_H
#define NV_MATH_TRIANGLE_H
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
#include <nvmath/Box.h>
namespace nv
{
/// Triangle class with three vertices.
class Triangle
{
public:
Triangle() {};
Triangle(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2)
{
v[0] = v0;
v[1] = v1;
v[2] = v2;
}
/// Get the bounds of the triangle.
Box bounds() const
{
Box bounds;
bounds.clearBounds();
bounds.addPointToBounds(v[0]);
bounds.addPointToBounds(v[1]);
bounds.addPointToBounds(v[2]);
return bounds;
}
Vector4 plane() const
{
Vector3 n = cross(v[1]-v[0], v[2]-v[0]);
return Vector4(n, dot(n, v[0]));
}
Vector3 v[3];
};
// Tomas Akenine-M<>ller box-triangle test.
NVMATH_API bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & triangle);
NVMATH_API bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & triangle);
// Moller ray triangle test.
NVMATH_API bool rayTest_Moller(const Triangle & t, Vector3::Arg orig, Vector3::Arg dir, float * out_t, float * out_u, float * out_v);
inline bool rayTest(const Triangle & t, Vector3::Arg orig, Vector3::Arg dir, float * out_t, float * out_u, float * out_v)
{
return rayTest_Moller(t, orig, dir, out_t, out_u, out_v);
}
inline bool overlap(const Triangle & t, const Box & b)
{
Vector3 center = b.center();
Vector3 extents = b.extents();
return triBoxOverlap(center, extents, t);
}
inline bool overlap(const Box & b, const Triangle & t)
{
return overlap(t, b);
}
inline bool overlapNoBounds(const Triangle & t, const Box & b)
{
Vector3 center = b.center();
Vector3 extents = b.extents();
return triBoxOverlapNoBounds(center, extents, t);
}
} // nv namespace
#endif // NV_MATH_TRIANGLE_H

View File

@ -4,7 +4,7 @@
#define NV_MATH_VECTOR_H
#include <nvmath/nvmath.h>
#include <nvcore/Algorithms.h> // min, max
#include <nvcore/Containers.h> // min, max
namespace nv
{
@ -27,7 +27,6 @@ public:
Vector2(Vector2::Arg v);
const Vector2 & operator=(Vector2::Arg v);
void setComponent(uint idx, scalar f);
scalar x() const;
scalar y() const;
@ -72,7 +71,6 @@ public:
const Vector2 & xy() const;
scalar component(uint idx) const;
void setComponent(uint idx, scalar f);
const scalar * ptr() const;
@ -117,7 +115,6 @@ public:
const Vector3 & xyz() const;
scalar component(uint idx) const;
void setComponent(uint idx, scalar f);
const scalar * ptr() const;
@ -164,14 +161,6 @@ inline scalar Vector2::component(uint idx) const
return 0.0f;
}
inline void Vector2::setComponent(uint idx, float f)
{
nvDebugCheck(idx < 2);
if (idx == 0) m_x = f;
else if (idx == 1) m_y = f;
}
inline const scalar * Vector2::ptr() const
{
return &m_x;
@ -250,21 +239,13 @@ inline const Vector2 & Vector3::xy() const
inline scalar Vector3::component(uint idx) const
{
nvDebugCheck(idx < 3);
if (idx == 0) return m_x;
if (idx == 1) return m_y;
if (idx == 2) return m_z;
if (idx == 0) return x();
if (idx == 1) return y();
if (idx == 2) return z();
nvAssume(false);
return 0.0f;
}
inline void Vector3::setComponent(uint idx, float f)
{
nvDebugCheck(idx < 3);
if (idx == 0) m_x = f;
else if (idx == 1) m_y = f;
else if (idx == 2) m_z = f;
}
inline const scalar * Vector3::ptr() const
{
return &m_x;
@ -372,15 +353,6 @@ inline scalar Vector4::component(uint idx) const
return 0.0f;
}
inline void Vector4::setComponent(uint idx, float f)
{
nvDebugCheck(idx < 4);
if (idx == 0) m_x = f;
else if (idx == 1) m_y = f;
else if (idx == 2) m_z = f;
else if (idx == 3) m_w = f;
}
inline const scalar * Vector4::ptr() const
{
return &m_x;
@ -505,35 +477,6 @@ inline scalar length(Vector2::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector2::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
}
inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
{
float l = length(v);
nvDebugCheck(!isZero(l, epsilon));
Vector2 n = scale(v, 1.0f / l);
nvDebugCheck(isNormalized(n));
return n;
}
inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
{
float l = length(v);
if (isZero(l, epsilon)) {
return fallback;
}
return scale(v, 1.0f / l);
}
inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
{
return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon);
@ -652,11 +595,6 @@ inline scalar length(Vector3::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector3::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
@ -778,11 +716,6 @@ inline scalar length(Vector4::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector4::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);

View File

@ -1,196 +1,164 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_H
#define NV_MATH_H
#include <nvcore/nvcore.h>
#include <nvcore/Debug.h>
#include <math.h>
#include <limits.h> // INT_MAX
#if NV_OS_WIN32
#include <float.h>
#endif
// Function linkage
#if NVMATH_SHARED
#ifdef NVMATH_EXPORTS
#define NVMATH_API DLL_EXPORT
#define NVMATH_CLASS DLL_EXPORT_CLASS
#else
#define NVMATH_API DLL_IMPORT
#define NVMATH_CLASS DLL_IMPORT
#endif
#else // NVMATH_SHARED
#define NVMATH_API
#define NVMATH_CLASS
#endif // NVMATH_SHARED
#ifndef PI
#define PI float(3.1415926535897932384626433833)
#endif
#define NV_EPSILON (0.0001f)
#define NV_NORMAL_EPSILON (0.001f)
/*
#define SQ(r) ((r)*(r))
#define SIGN_BITMASK 0x80000000
/// Integer representation of a floating-point value.
#define IR(x) ((uint32 &)(x))
/// Absolute integer representation of a floating-point value
#define AIR(x) (IR(x) & 0x7fffffff)
/// Floating-point representation of an integer value.
#define FR(x) ((float&)(x))
/// Integer-based comparison of a floating point value.
/// Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)
*/
inline double sqrt_assert(const double f)
{
nvDebugCheck(f >= 0.0f);
return sqrt(f);
}
inline float sqrtf_assert(const float f)
{
nvDebugCheck(f >= 0.0f);
return sqrtf(f);
}
inline double acos_assert(const double f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return acos(f);
}
inline float acosf_assert(const float f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return acosf(f);
}
inline double asin_assert(const double f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return asin(f);
}
inline float asinf_assert(const float f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return asinf(f);
}
// Replace default functions with asserting ones.
#define sqrt sqrt_assert
#define sqrtf sqrtf_assert
#define acos acos_assert
#define acosf acosf_assert
#define asin asin_assert
#define asinf asinf_assert
namespace nv
{
inline float toRadian(float degree) { return degree * (PI / 180.0f); }
inline float toDegree(float radian) { return radian * (180.0f / PI); }
inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)
{
return fabs(f0-f1) <= epsilon;
}
inline bool isZero(const float f, const float epsilon = NV_EPSILON)
{
return fabs(f) <= epsilon;
}
inline bool isFinite(const float f)
{
#if NV_OS_WIN32
return _finite(f) != 0;
#elif NV_OS_DARWIN || NV_OS_FREEBSD
return isfinite(f);
#elif NV_OS_LINUX
return finitef(f);
#else
# error "isFinite not supported"
#endif
//return std::isfinite (f);
//return finite (f);
}
inline bool isNan(const float f)
{
#if NV_OS_WIN32
return _isnan(f) != 0;
#elif NV_OS_DARWIN || NV_OS_FREEBSD
return isnan(f);
#elif NV_OS_LINUX
return isnanf(f);
#else
# error "isNan not supported"
#endif
}
inline uint log2(uint i)
{
uint value = 0;
while( i >>= 1 ) {
value++;
}
return value;
}
inline float log2f(float x)
{
nvCheck(x >= 0);
return logf(x) / logf(2.0f);
}
inline float lerp(float f0, float f1, float t)
{
const float s = 1.0f - t;
return f0 * s + f1 * t;
}
inline float square(float f)
{
return f * f;
}
// @@ Float to int conversions to be optimized at some point. See:
// http://cbloomrants.blogspot.com/2009/01/01-17-09-float-to-int.html
// http://www.stereopsis.com/sree/fpu2006.html
// http://assemblyrequired.crashworks.org/2009/01/12/why-you-should-never-cast-floats-to-ints/
// http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point
inline int iround(float f)
{
return int(f);
}
inline int ifloor(float f)
{
return int(floorf(f));
}
inline int iceil(float f)
{
return int(ceilf(f));
}
inline float frac(float f)
{
return f - floor(f);
}
} // nv
#endif // NV_MATH_H
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_H
#define NV_MATH_H
#include <nvcore/nvcore.h>
#include <nvcore/Debug.h>
#include <math.h>
// Function linkage
#if NVMATH_SHARED
#ifdef NVMATH_EXPORTS
#define NVMATH_API DLL_EXPORT
#define NVMATH_CLASS DLL_EXPORT_CLASS
#else
#define NVMATH_API DLL_IMPORT
#define NVMATH_CLASS DLL_IMPORT
#endif
#else // NVMATH_SHARED
#define NVMATH_API
#define NVMATH_CLASS
#endif // NVMATH_SHARED
#ifndef PI
#define PI float(3.1415926535897932384626433833)
#endif
#define NV_EPSILON (0.0001f)
#define NV_NORMAL_EPSILON (0.001f)
/*
#define SQ(r) ((r)*(r))
#define SIGN_BITMASK 0x80000000
/// Integer representation of a floating-point value.
#define IR(x) ((uint32 &)(x))
/// Absolute integer representation of a floating-point value
#define AIR(x) (IR(x) & 0x7fffffff)
/// Floating-point representation of an integer value.
#define FR(x) ((float&)(x))
/// Integer-based comparison of a floating point value.
/// Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)
*/
inline double sqrt_assert(const double f)
{
nvDebugCheck(f >= 0.0f);
return sqrt(f);
}
inline float sqrtf_assert(const float f)
{
nvDebugCheck(f >= 0.0f);
return sqrtf(f);
}
inline double acos_assert(const double f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return acos(f);
}
inline float acosf_assert(const float f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return acosf(f);
}
inline double asin_assert(const double f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return asin(f);
}
inline float asinf_assert(const float f)
{
nvDebugCheck(f >= -1.0f && f <= 1.0f);
return asinf(f);
}
// Replace default functions with asserting ones.
#define sqrt sqrt_assert
#define sqrtf sqrtf_assert
#define acos acos_assert
#define acosf acosf_assert
#define asin asin_assert
#define asinf asinf_assert
#if NV_OS_WIN32
#include <float.h>
#endif
namespace nv
{
inline float toRadian(float degree) { return degree * (PI / 180.0f); }
inline float toDegree(float radian) { return radian * (180.0f / PI); }
inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)
{
return fabs(f0-f1) <= epsilon;
}
inline bool isZero(const float f, const float epsilon = NV_EPSILON)
{
return fabs(f) <= epsilon;
}
inline bool isFinite(const float f)
{
#if NV_OS_WIN32
return _finite(f) != 0;
#elif NV_OS_DARWIN
return isfinite(f);
#elif NV_OS_LINUX
return finitef(f);
#else
# error "isFinite not supported"
#endif
//return std::isfinite (f);
//return finite (f);
}
inline bool isNan(const float f)
{
#if NV_OS_WIN32
return _isnan(f) != 0;
#elif NV_OS_DARWIN
return isnan(f);
#elif NV_OS_LINUX
return isnanf(f);
#else
# error "isNan not supported"
#endif
}
inline uint log2(uint i)
{
uint value = 0;
while( i >>= 1 ) {
value++;
}
return value;
}
inline float lerp(float f0, float f1, float t)
{
const float s = 1.0f - t;
return f0 * s + f1 * t;
}
inline float square(float f)
{
return f * f;
}
} // nv
#endif // NV_MATH_H

View File

@ -5,17 +5,14 @@ ADD_SUBDIRECTORY(squish)
SET(NVTT_SRCS
nvtt.h
nvtt.cpp
Context.h
Context.cpp
Compressor.h
Compressor.cpp
nvtt_wrapper.h
nvtt_wrapper.cpp
Compressor.h
CompressorDXT.h
CompressorDXT.cpp
CompressorRGB.h
CompressorRGB.cpp
CompressorRGBE.h
CompressorRGBE.cpp
CompressDXT.h
CompressDXT.cpp
CompressRGB.h
CompressRGB.cpp
QuickCompressDXT.h
QuickCompressDXT.cpp
OptimalCompressDXT.h
@ -27,27 +24,27 @@ SET(NVTT_SRCS
InputOptions.cpp
OutputOptions.h
OutputOptions.cpp
TexImage.h TexImage.cpp
cuda/CudaUtils.h
cuda/CudaUtils.cpp
cuda/CudaMath.h
cuda/BitmapTable.h
cuda/CudaCompressorDXT.h
cuda/CudaCompressorDXT.cpp)
cuda/Bitmaps.h
cuda/CudaCompressDXT.h
cuda/CudaCompressDXT.cpp)
IF (CUDA_FOUND)
IF(CUDA_FOUND)
ADD_DEFINITIONS(-DHAVE_CUDA)
CUDA_COMPILE(CUDA_SRCS cuda/CompressKernel.cu)
WRAP_CUDA(CUDA_SRCS cuda/CompressKernel.cu)
SET(NVTT_SRCS ${NVTT_SRCS} ${CUDA_SRCS})
SET(LIBS ${LIBS} ${CUDA_LIBRARIES})
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
ENDIF (CUDA_FOUND)
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_PATH})
ENDIF(CUDA_FOUND)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVTT_EXPORTS)
IF(NVTT_SHARED)
IF(NVTT_SHARED)
ADD_DEFINITIONS(-DNVTT_SHARED=1)
ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
@ -63,5 +60,54 @@ INSTALL(TARGETS nvtt
INSTALL(FILES nvtt.h DESTINATION include/nvtt)
ADD_SUBDIRECTORY(tools)
ADD_SUBDIRECTORY(tests)
# test executables
ADD_EXECUTABLE(nvcompress tools/compress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvdecompress tools/decompress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage)
ADD_EXECUTABLE(nvddsinfo tools/ddsinfo.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage)
ADD_EXECUTABLE(nvimgdiff tools/imgdiff.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage)
ADD_EXECUTABLE(nvassemble tools/assemble.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom DESTINATION bin)
# UI tools
IF(QT4_FOUND AND NOT MSVC)
SET(QT_USE_QTOPENGL TRUE)
INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
SET(SRCS
tools/main.cpp
tools/configdialog.h
tools/configdialog.cpp)
SET(LIBS
nvtt
${QT_QTCORE_LIBRARY}
${QT_QTGUI_LIBRARY}
${QT_QTOPENGL_LIBRARY})
QT4_WRAP_UI(UICS tools/configdialog.ui)
QT4_WRAP_CPP(MOCS tools/configdialog.h)
#QT4_ADD_RESOURCES(RCCS tools/configdialog.rc)
ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS})
TARGET_LINK_LIBRARIES(nvcompressui ${LIBS})
ENDIF(QT4_FOUND AND NOT MSVC)

597
src/nvtt/CompressDXT.cpp Normal file
View File

@ -0,0 +1,597 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Memory.h>
#include <nvimage/Image.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "nvtt.h"
#include "CompressDXT.h"
#include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
// squish
#include "squish/colourset.h"
//#include "squish/clusterfit.h"
#include "squish/fastclusterfit.h"
#include "squish/weightedclusterfit.h"
// s3_quant
#if defined(HAVE_S3QUANT)
#include "s3tc/s3_quant.h"
#endif
// ati tc
#if defined(HAVE_ATITC)
#include "atitc/ATI_Compress.h"
#endif
//#include <time.h>
using namespace nv;
using namespace nvtt;
nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
{
}
nv::FastCompressor::~FastCompressor()
{
}
void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
QuickCompress::compressDXT1(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
QuickCompress::compressDXT1a(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT3 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
QuickCompress::compressDXT3(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
QuickCompress::compressDXT5(rgba, &block, 0);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.swizzleDXT5n();
QuickCompress::compressDXT5(rgba, &block, 0);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
{
}
nv::SlowCompressor::~SlowCompressor()
{
}
void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
squish::WeightedClusterFit fit;
//squish::ClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
squish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
bool anyAlpha = false;
bool allAlpha = true;
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a < 128) anyAlpha = true;
else allAlpha = false;
}
if ((!anyAlpha && rgba.isSingleColor() || allAlpha))
{
OptimalCompress::compressDXT1a(rgba.color(0), &block);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT3 block;
squish::WeightedClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
// Compress explicit alpha.
OptimalCompress::compressDXT3A(rgba, &block.alpha);
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
squish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
// Compress alpha.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block.alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block.alpha);
}
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.swizzleDXT5n();
// Compress X.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block.alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block.alpha);
}
// Compress Y.
OptimalCompress::compressDXT1G(rgba, &block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
AlphaBlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block);
}
else
{
QuickCompress::compressDXT5A(rgba, &block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock xcolor;
ColorBlock ycolor;
BlockATI2 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
xcolor.init(m_image, x, y);
xcolor.splatX();
ycolor.init(m_image, x, y);
ycolor.splatY();
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(xcolor, &block.x);
OptimalCompress::compressDXT5A(ycolor, &block.y);
}
else
{
QuickCompress::compressDXT5A(xcolor, &block.x);
QuickCompress::compressDXT5A(ycolor, &block.y);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
#if defined(HAVE_S3QUANT)
void nv::s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
float error = 0.0f;
BlockDXT1 dxtBlock3;
BlockDXT1 dxtBlock4;
ColorBlock block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
block.init(image, x, y);
// Init rgb block.
RGBBlock rgbBlock;
rgbBlock.n = 16;
for (uint i = 0; i < 16; i++) {
rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f);
}
rgbBlock.weight[0] = 1.0f;
rgbBlock.weight[1] = 1.0f;
rgbBlock.weight[2] = 1.0f;
rgbBlock.inLevel = 4;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock4.col0.r = rgbBlock.endPoint[0][0];
dxtBlock4.col0.g = rgbBlock.endPoint[0][1];
dxtBlock4.col0.b = rgbBlock.endPoint[0][2];
dxtBlock4.col1.r = rgbBlock.endPoint[1][0];
dxtBlock4.col1.g = rgbBlock.endPoint[1][1];
dxtBlock4.col1.b = rgbBlock.endPoint[1][2];
dxtBlock4.setIndices(rgbBlock.index);
if (dxtBlock4.col0.u < dxtBlock4.col1.u) {
swap(dxtBlock4.col0.u, dxtBlock4.col1.u);
dxtBlock4.indices ^= 0x55555555;
}
uint error4 = blockError(block, dxtBlock4);
rgbBlock.inLevel = 3;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock3.col0.r = rgbBlock.endPoint[0][0];
dxtBlock3.col0.g = rgbBlock.endPoint[0][1];
dxtBlock3.col0.b = rgbBlock.endPoint[0][2];
dxtBlock3.col1.r = rgbBlock.endPoint[1][0];
dxtBlock3.col1.g = rgbBlock.endPoint[1][1];
dxtBlock3.col1.b = rgbBlock.endPoint[1][2];
dxtBlock3.setIndices(rgbBlock.index);
if (dxtBlock3.col0.u > dxtBlock3.col1.u) {
swap(dxtBlock3.col0.u, dxtBlock3.col1.u);
dxtBlock3.indices ^= (~dxtBlock3.indices >> 1) & 0x55555555;
}
uint error3 = blockError(block, dxtBlock3);
if (error3 < error4) {
error += error3;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3));
}
}
else {
error += error4;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4));
}
}
}
}
printf("error = %f\n", error/((w+3)/4 * (h+3)/4));
}
#endif // defined(HAVE_S3QUANT)
#if defined(HAVE_ATITC)
void nv::atiCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
{
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = image->width();
srcTexture.dwHeight = image->height();
srcTexture.dwPitch = image->width() * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) image->pixels();
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = image->width();
destTexture.dwHeight = image->height();
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
}
#endif // defined(HAVE_ATITC)

87
src/nvtt/CompressDXT.h Normal file
View File

@ -0,0 +1,87 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSDXT_H
#define NV_TT_COMPRESSDXT_H
#include <nvimage/nvimage.h>
#include "nvtt.h"
namespace nv
{
class Image;
class FloatImage;
class FastCompressor
{
public:
FastCompressor();
~FastCompressor();
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
class SlowCompressor
{
public:
SlowCompressor();
~SlowCompressor();
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
// External compressors.
#if defined(HAVE_S3QUANT)
void s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
#endif
#if defined(HAVE_ATITC)
void atiCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
#endif
} // nv namespace
#endif // NV_TT_COMPRESSDXT_H

140
src/nvtt/CompressRGB.cpp Normal file
View File

@ -0,0 +1,140 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvimage/Image.h>
#include <nvimage/PixelFormat.h>
#include <nvmath/Color.h>
#include "CompressRGB.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
using namespace nv;
using namespace nvtt;
namespace
{
inline uint computePitch(uint w, uint bitsize)
{
uint p = w * ((bitsize + 7) / 8);
// Align to 32 bits.
return ((p + 3) / 4) * 4;
}
inline void convert_to_a8r8g8b8(const void * src, void * dst, uint w)
{
memcpy(dst, src, 4 * w);
}
inline void convert_to_x8r8g8b8(const void * src, void * dst, uint w)
{
memcpy(dst, src, 4 * w);
}
} // namespace
// Pixel format converter.
void nv::compressRGB(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
const uint bitCount = compressionOptions.bitcount;
nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32);
const uint byteCount = bitCount / 8;
const uint rmask = compressionOptions.rmask;
uint rshift, rsize;
PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
const uint gmask = compressionOptions.gmask;
uint gshift, gsize;
PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
const uint bmask = compressionOptions.bmask;
uint bshift, bsize;
PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
const uint amask = compressionOptions.amask;
uint ashift, asize;
PixelFormat::maskShiftAndSize(amask, &ashift, &asize);
// Determine pitch.
uint pitch = computePitch(w, compressionOptions.bitcount);
uint8 * dst = (uint8 *)mem::malloc(pitch + 4);
for (uint y = 0; y < h; y++)
{
const Color32 * src = image->scanline(y);
if (bitCount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0xFF000000)
{
convert_to_a8r8g8b8(src, dst, w);
}
else if (bitCount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0)
{
convert_to_x8r8g8b8(src, dst, w);
}
else
{
// Generic pixel format conversion.
for (uint x = 0; x < w; x++)
{
uint c = 0;
c |= PixelFormat::convert(src[x].r, 8, rsize) << rshift;
c |= PixelFormat::convert(src[x].g, 8, gsize) << gshift;
c |= PixelFormat::convert(src[x].b, 8, bsize) << bshift;
c |= PixelFormat::convert(src[x].a, 8, asize) << ashift;
// Output one byte at a time.
for (uint i = 0; i < byteCount; i++)
{
*(dst + x * byteCount + i) = (c >> (i * 8)) & 0xFF;
}
}
// Zero padding.
for (uint x = w * byteCount; x < pitch; x++)
{
*(dst + x) = 0;
}
}
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(dst, pitch);
}
}
mem::free(dst);
}

View File

@ -21,20 +21,19 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSORRGBE_H
#define NV_TT_COMPRESSORRGBE_H
#ifndef NV_TT_COMPRESSRGB_H
#define NV_TT_COMPRESSRGB_H
#include "nvtt.h"
#include "Compressor.h"
namespace nv
{
struct CompressorRGBE : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
class Image;
// Pixel format converter.
void compressRGB(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
} // nv namespace
#endif // NV_TT_COMPRESSORRGBE_H
#endif // NV_TT_COMPRESSDXT_H

View File

@ -55,12 +55,6 @@ void CompressionOptions::reset()
m.rmask = 0x00FF0000;
m.amask = 0xFF000000;
m.rsize = 8;
m.gsize = 8;
m.bsize = 8;
m.asize = 8;
m.pixelType = PixelType_UnsignedNorm;
m.enableColorDithering = false;
m.enableAlphaDithering = false;
m.binaryAlpha = false;
@ -123,36 +117,8 @@ void CompressionOptions::setPixelFormat(uint bitcount, uint rmask, uint gmask, u
m.gmask = gmask;
m.bmask = bmask;
m.amask = amask;
m.rsize = 0;
m.gsize = 0;
m.bsize = 0;
m.asize = 0;
}
void CompressionOptions::setPixelFormat(uint8 rsize, uint8 gsize, uint8 bsize, uint8 asize)
{
nvCheck(rsize <= 32 || gsize <= 32 || bsize <= 32 || asize <= 32);
m.bitcount = 0;
m.rmask = 0;
m.gmask = 0;
m.bmask = 0;
m.amask = 0;
m.rsize = rsize;
m.gsize = gsize;
m.bsize = bsize;
m.asize = asize;
}
/// Set pixel type.
void CompressionOptions::setPixelType(PixelType pixelType)
{
m.pixelType = pixelType;
}
/// Use external compressor.
void CompressionOptions::setExternalCompressor(const char * name)
{

View File

@ -45,12 +45,6 @@ namespace nvtt
uint gmask;
uint bmask;
uint amask;
uint8 rsize;
uint8 gsize;
uint8 bsize;
uint8 asize;
PixelType pixelType;
nv::String externalCompressor;
@ -59,15 +53,6 @@ namespace nvtt
bool enableAlphaDithering;
bool binaryAlpha;
int alphaThreshold; // reference value used for binary alpha quantization.
uint getBitCount() const
{
if (format == Format_RGBA) {
if (bitcount != 0) return bitcount;
else return rsize + gsize + bsize + asize;
}
return 0;
}
};
} // nvtt namespace

853
src/nvtt/Compressor.cpp Normal file
View File

@ -0,0 +1,853 @@
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <nvcore/Memory.h>
#include <nvcore/Ptr.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include <nvimage/Filter.h>
#include <nvimage/Quantize.h>
#include <nvimage/NormalMap.h>
#include <nvimage/PixelFormat.h>
#include "Compressor.h"
#include "InputOptions.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
#include "CompressDXT.h"
#include "CompressRGB.h"
#include "cuda/CudaUtils.h"
#include "cuda/CudaCompressDXT.h"
using namespace nv;
using namespace nvtt;
namespace
{
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a) {
return 8;
}
else if (format == Format_DXT3) {
return 16;
}
else if (format == Format_DXT5 || format == Format_DXT5n) {
return 16;
}
else if (format == Format_BC4) {
return 8;
}
else if (format == Format_BC5) {
return 16;
}
return 0;
}
inline uint computePitch(uint w, uint bitsize)
{
uint p = w * ((bitsize + 7) / 8);
// Align to 32 bits.
return ((p + 3) / 4) * 4;
}
static int computeImageSize(uint w, uint h, uint d, uint bitCount, Format format)
{
if (format == Format_RGBA) {
return d * h * computePitch(w, bitCount);
}
else {
// @@ Handle 3D textures. DXT and VTC have different behaviors.
return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format);
}
}
} // namespace
namespace nvtt
{
// Mipmap could be:
// - a pointer to an input image.
// - a fixed point image.
// - a floating point image.
struct Mipmap
{
Mipmap() : m_inputImage(NULL) {}
~Mipmap() {}
// Reference input image.
void setFromInput(const InputOptions::Private & inputOptions, uint idx)
{
m_inputImage = inputOptions.image(idx);
m_fixedImage = NULL;
m_floatImage = NULL;
}
// Assign and take ownership of given image.
void setImage(FloatImage * image)
{
m_inputImage = NULL;
m_fixedImage = NULL;
m_floatImage = image;
}
// Convert linear float image to fixed image ready for compression.
void toFixedImage(const InputOptions::Private & inputOptions)
{
if (m_floatImage != NULL) // apfaffe - We should check that we have a float image, if so convert it!
{
if (inputOptions.isNormalMap || inputOptions.outputGamma == 1.0f)
{
m_fixedImage = m_floatImage->createImage();
}
else
{
m_fixedImage = m_floatImage->createImageGammaCorrect(inputOptions.outputGamma);
}
}
}
// Convert input image to linear float image.
void toFloatImage(const InputOptions::Private & inputOptions)
{
if (m_floatImage == NULL)
{
nvDebugCheck(this->asFixedImage() != NULL);
m_floatImage = new FloatImage(this->asFixedImage());
if (inputOptions.isNormalMap)
{
// Expand normals to [-1, 1] range.
// floatImage->expandNormals(0);
}
else if (inputOptions.inputGamma != 1.0f)
{
// Convert to linear space.
m_floatImage->toLinear(0, 3, inputOptions.inputGamma);
}
}
}
const FloatImage * asFloatImage() const
{
return m_floatImage.ptr();
}
FloatImage * asFloatImage()
{
return m_floatImage.ptr();
}
const Image * asFixedImage() const
{
// - apfaffe - switched logic to return the 'processed image' rather than the input!
if (m_fixedImage != NULL && m_fixedImage.ptr() != NULL)
{
return m_fixedImage.ptr();
}
return m_inputImage;
}
Image * asMutableFixedImage()
{
if (m_inputImage != NULL)
{
// Do not modify input image, create a copy.
m_fixedImage = new Image(*m_inputImage);
m_inputImage = NULL;
}
return m_fixedImage.ptr();
}
private:
const Image * m_inputImage;
AutoPtr<Image> m_fixedImage;
AutoPtr<FloatImage> m_floatImage;
};
} // nvtt namespace
Compressor::Compressor() : m(*new Compressor::Private())
{
// CUDA initialization.
m.cudaSupported = cuda::isHardwarePresent();
m.cudaEnabled = false;
m.cudaDevice = -1;
enableCudaAcceleration(m.cudaSupported);
}
Compressor::~Compressor()
{
enableCudaAcceleration(false);
delete &m;
}
/// Enable CUDA acceleration.
void Compressor::enableCudaAcceleration(bool enable)
{
if (m.cudaSupported)
{
if (m.cudaEnabled && !enable)
{
m.cudaEnabled = false;
m.cuda = NULL;
if (m.cudaDevice != -1)
{
// Exit device.
cuda::exitDevice();
}
}
else if (!m.cudaEnabled && enable)
{
// Init the CUDA device. This may return -1 if CUDA was already initialized by the app.
m.cudaEnabled = cuda::initDevice(&m.cudaDevice);
if (m.cudaEnabled)
{
// Create compressor if initialization succeeds.
m.cuda = new CudaCompressor();
// But cleanup if failed.
if (!m.cuda->isValid())
{
enableCudaAcceleration(false);
}
}
}
}
}
/// Check if CUDA acceleration is enabled.
bool Compressor::isCudaAccelerationEnabled() const
{
return m.cudaEnabled;
}
/// Compress the input texture with the given compression options.
bool Compressor::process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
{
return m.compress(inputOptions.m, compressionOptions.m, outputOptions.m);
}
/// Estimate the size of compressing the input with the given options.
int Compressor::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const
{
return m.estimateSize(inputOptions.m, compressionOptions.m);
}
bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
// Make sure enums match.
nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp);
nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror);
nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat);
// Get output handler.
if (!outputOptions.openFile())
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen);
return false;
}
inputOptions.computeTargetExtents();
// Output DDS header.
if (!outputHeader(inputOptions, compressionOptions, outputOptions))
{
return false;
}
for (uint f = 0; f < inputOptions.faceCount; f++)
{
if (!compressMipmaps(f, inputOptions, compressionOptions, outputOptions))
{
return false;
}
}
outputOptions.closeFile();
return true;
}
// Output DDS header.
bool Compressor::Private::outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
// Output DDS header.
if (outputOptions.outputHandler == NULL || !outputOptions.outputHeader)
{
return true;
}
DDSHeader header;
header.setWidth(inputOptions.targetWidth);
header.setHeight(inputOptions.targetHeight);
int mipmapCount = inputOptions.realMipmapCount();
nvDebugCheck(mipmapCount > 0);
header.setMipmapCount(mipmapCount);
if (inputOptions.textureType == TextureType_2D) {
header.setTexture2D();
}
else if (inputOptions.textureType == TextureType_Cube) {
header.setTextureCube();
}
/*else if (inputOptions.textureType == TextureType_3D) {
header.setTexture3D();
header.setDepth(inputOptions.targetDepth);
}*/
if (compressionOptions.format == Format_RGBA)
{
header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount));
header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask);
}
else
{
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_DXT3) {
header.setFourCC('D', 'X', 'T', '3');
}
else if (compressionOptions.format == Format_DXT5) {
header.setFourCC('D', 'X', 'T', '5');
}
else if (compressionOptions.format == Format_DXT5n) {
header.setFourCC('D', 'X', 'T', '5');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_BC4) {
header.setFourCC('A', 'T', 'I', '1');
}
else if (compressionOptions.format == Format_BC5) {
header.setFourCC('A', 'T', 'I', '2');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
header.swapBytes();
uint headerSize = 128;
if (header.hasDX10Header())
{
nvStaticCheck(sizeof(DDSHeader) == 128 + 20);
headerSize = 128 + 20;
}
bool writeSucceed = outputOptions.outputHandler->writeData(&header, headerSize);
if (!writeSucceed && outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_FileWrite);
}
return writeSucceed;
}
bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
uint w = inputOptions.targetWidth;
uint h = inputOptions.targetHeight;
uint d = inputOptions.targetDepth;
Mipmap mipmap;
const uint mipmapCount = inputOptions.realMipmapCount();
nvDebugCheck(mipmapCount > 0);
for (uint m = 0; m < mipmapCount; m++)
{
if (outputOptions.outputHandler)
{
int size = computeImageSize(w, h, d, compressionOptions.bitcount, compressionOptions.format);
outputOptions.outputHandler->beginImage(size, w, h, d, f, m);
}
// @@ Where to do the color transform?
// - Color transform may not be linear, so we cannot do before computing mipmaps.
// - Should be done in linear space, that is, after gamma correction.
if (!initMipmap(mipmap, inputOptions, w, h, d, f, m))
{
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_InvalidInput);
return false;
}
}
quantizeMipmap(mipmap, compressionOptions);
compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
// Compute extents of next mipmap:
w = max(1U, w / 2);
h = max(1U, h / 2);
d = max(1U, d / 2);
}
return true;
}
bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const
{
// Find image from input.
int inputIdx = findExactMipmap(inputOptions, w, h, d, f);
if ((inputIdx == -1 || inputOptions.convertToNormalMap) && m != 0)
{
// Generate from last, when mipmap not found, or normal map conversion enabled.
downsampleMipmap(mipmap, inputOptions);
}
else
{
if (inputIdx != -1)
{
// If input mipmap found, then get from input.
mipmap.setFromInput(inputOptions, inputIdx);
}
else
{
// If not found, resize closest mipmap.
inputIdx = findClosestMipmap(inputOptions, w, h, d, f);
if (inputIdx == -1)
{
return false;
}
mipmap.setFromInput(inputOptions, inputIdx);
scaleMipmap(mipmap, inputOptions, w, h, d);
}
processInputImage(mipmap, inputOptions);
}
// Convert linear float image to fixed image ready for compression.
mipmap.toFixedImage(inputOptions);
return true;
}
int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const
{
for (int m = 0; m < int(inputOptions.mipmapCount); m++)
{
int idx = f * inputOptions.mipmapCount + m;
const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d))
{
if (inputImage.data != NULL)
{
return idx;
}
return -1;
}
else if (inputImage.width < int(w) || inputImage.height < int(h) || inputImage.depth < int(d))
{
return -1;
}
}
return -1;
}
int Compressor::Private::findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const
{
int bestIdx = -1;
for (int m = 0; m < int(inputOptions.mipmapCount); m++)
{
int idx = f * inputOptions.mipmapCount + m;
const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
if (inputImage.data != NULL)
{
int difference = (inputImage.width - w) + (inputImage.height - h) + (inputImage.depth - d);
if (difference < 0)
{
if (bestIdx == -1)
{
bestIdx = idx;
}
return bestIdx;
}
bestIdx = idx;
}
}
return bestIdx;
}
// Create mipmap from the given image.
void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{
// Make sure that floating point linear representation is available.
mipmap.toFloatImage(inputOptions);
const FloatImage * floatImage = mipmap.asFloatImage();
if (inputOptions.mipmapFilter == MipmapFilter_Box)
{
// Use fast downsample.
mipmap.setImage(floatImage->fastDownSample());
}
else if (inputOptions.mipmapFilter == MipmapFilter_Triangle)
{
TriangleFilter filter;
mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
}
else /*if (inputOptions.mipmapFilter == MipmapFilter_Kaiser)*/
{
nvDebugCheck(inputOptions.mipmapFilter == MipmapFilter_Kaiser);
KaiserFilter filter(inputOptions.kaiserWidth);
filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
}
// Normalize mipmap.
if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps)
{
normalizeNormalMap(mipmap.asFloatImage());
}
}
void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const
{
mipmap.toFloatImage(inputOptions);
// @@ Add more filters.
// @@ Select different filters for downscaling and reconstruction.
// Resize image.
BoxFilter boxFilter;
mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
}
// Process an input image: Convert to normal map, normalize, or convert to linear space.
void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{
if (inputOptions.convertToNormalMap)
{
mipmap.toFixedImage(inputOptions);
Vector4 heightScale = inputOptions.heightFactors;
mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale));
}
else if (inputOptions.isNormalMap)
{
if (inputOptions.normalizeMipmaps)
{
// If floating point image available, normalize in place.
if (mipmap.asFloatImage() == NULL)
{
FloatImage * floatImage = new FloatImage(mipmap.asFixedImage());
normalizeNormalMap(floatImage);
mipmap.setImage(floatImage);
}
else
{
normalizeNormalMap(mipmap.asFloatImage());
mipmap.setImage(mipmap.asFloatImage());
}
}
}
else
{
if (inputOptions.inputGamma != inputOptions.outputGamma)
{
mipmap.toFloatImage(inputOptions);
}
}
}
// Quantize the given mipmap according to the compression options.
void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const
{
nvDebugCheck(mipmap.asFixedImage() != NULL);
if (compressionOptions.binaryAlpha)
{
if (compressionOptions.enableAlphaDithering)
{
Quantize::FloydSteinberg_BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
}
else
{
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
}
}
if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering)
{
uint rsize = 8;
uint gsize = 8;
uint bsize = 8;
uint asize = 8;
if (compressionOptions.enableColorDithering)
{
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
{
rsize = 5;
gsize = 6;
bsize = 5;
}
else if (compressionOptions.format == Format_RGB)
{
uint rshift, gshift, bshift;
PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize);
PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize);
PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize);
}
}
if (compressionOptions.enableAlphaDithering)
{
if (compressionOptions.format == Format_DXT3)
{
asize = 4;
}
else if (compressionOptions.format == Format_RGB)
{
uint ashift;
PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize);
}
}
if (compressionOptions.binaryAlpha)
{
asize = 8; // Already quantized.
}
Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize);
}
}
// Compress the given mipmap.
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
const Image * image = mipmap.asFixedImage();
nvDebugCheck(image != NULL);
FastCompressor fast;
fast.setImage(image, inputOptions.alphaMode);
SlowCompressor slow;
slow.setImage(image, inputOptions.alphaMode);
const bool useCuda = cudaEnabled && image->width() * image->height() >= 512;
if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
{
compressRGB(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_DXT1)
{
#if defined(HAVE_S3QUANT)
if (compressionOptions.externalCompressor == "s3")
{
s3CompressDXT1(image, outputOptions);
}
else
#endif
#if defined(HAVE_ATITC)
if (compressionOptions.externalCompressor == "ati")
{
atiCompressDXT1(image, outputOptions);
}
else
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
}
else
{
slow.compressDXT1(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT1a)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1a(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
/*cuda*/slow.compressDXT1a(compressionOptions, outputOptions);
}
else
{
slow.compressDXT1a(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT3)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT3(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT3(compressionOptions, outputOptions);
}
else
{
slow.compressDXT3(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT5)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT5(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT5(compressionOptions, outputOptions);
}
else
{
slow.compressDXT5(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT5n)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT5n(outputOptions);
}
else
{
slow.compressDXT5n(compressionOptions, outputOptions);
}
}
else if (compressionOptions.format == Format_BC4)
{
slow.compressBC4(compressionOptions, outputOptions);
}
else if (compressionOptions.format == Format_BC5)
{
slow.compressBC5(compressionOptions, outputOptions);
}
return true;
}
int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const
{
const Format format = compressionOptions.format;
const uint bitCount = compressionOptions.bitcount;
inputOptions.computeTargetExtents();
uint mipmapCount = inputOptions.realMipmapCount();
int size = 0;
for (uint f = 0; f < inputOptions.faceCount; f++)
{
uint w = inputOptions.targetWidth;
uint h = inputOptions.targetHeight;
uint d = inputOptions.targetDepth;
for (uint m = 0; m < mipmapCount; m++)
{
size += computeImageSize(w, h, d, bitCount, format);
// Compute extents of next mipmap:
w = max(1U, w / 2);
h = max(1U, h / 2);
d = max(1U, d / 2);
}
}
return size;
}

View File

@ -1,4 +1,4 @@
// Copyright Ignacio Castano <icastano@nvidia.com> 2009
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -24,17 +24,57 @@
#ifndef NV_TT_COMPRESSOR_H
#define NV_TT_COMPRESSOR_H
#include <nvcore/nvcore.h> // uint
#include <nvcore/Ptr.h>
#include <nvtt/cuda/CudaCompressDXT.h>
#include "nvtt.h"
namespace nv
{
struct CompressorInterface
class Image;
}
namespace nvtt
{
struct Mipmap;
struct Compressor::Private
{
virtual ~CompressorInterface() {}
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) = 0;
Private() {}
bool compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
int estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const;
private:
bool outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
bool compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
bool initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const;
int findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
int findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
public:
bool cudaSupported;
bool cudaEnabled;
int cudaDevice;
nv::AutoPtr<nv::CudaCompressor> cuda;
};
} // nv namespace
} // nvtt namespace
#endif // NV_TT_COMPRESSOR_H
#endif // NV_TT_COMPRESSOR_H

View File

@ -1,676 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "CompressorDXT.h"
#include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
// squish
#include "squish/colourset.h"
#include "squish/fastclusterfit.h"
#include "squish/weightedclusterfit.h"
#include "nvtt.h"
#include "nvcore/Memory.h"
#include "nvimage/Image.h"
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
// s3_quant
#if defined(HAVE_S3QUANT)
#include "s3tc/s3_quant.h"
#endif
// ati tc
#if defined(HAVE_ATITC)
typedef int BOOL;
typedef _W64 unsigned long ULONG_PTR;
typedef ULONG_PTR DWORD_PTR;
#include "atitc/ATI_Compress.h"
#endif
// squish
#if defined(HAVE_SQUISH)
//#include "squish/squish.h"
#include "squish-1.10/squish.h"
#endif
// d3dx
#if defined(HAVE_D3DX)
#include <d3dx9.h>
#endif
// stb
#if defined(HAVE_STB)
#define STB_DEFINE
#include "stb/stb_dxt.h"
#endif
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
using namespace nv;
using namespace nvtt;
void FixedBlockCompressor::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
const uint size = bs * bw * bh;
#if defined(HAVE_OPENMP)
bool singleThreaded = false;
#else
bool singleThreaded = true;
#endif
// Use a single thread to compress small textures.
if (bw * bh < 16) singleThreaded = true;
if (singleThreaded)
{
nvDebugCheck(bs <= 16);
uint8 mem[16];
for (int y = 0; y < int(h); y += 4) {
for (uint x = 0; x < w; x += 4) {
ColorBlock rgba;
if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
rgba.init(w, h, (uint *)data, x, y);
}
else {
nvDebugCheck(inputFormat == nvtt::InputFormat_RGBA_32F);
rgba.init(w, h, (float *)data, x, y);
}
compressBlock(rgba, alphaMode, compressionOptions, mem);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs);
}
}
}
}
#if defined(HAVE_OPENMP)
else
{
uint8 * mem = new uint8[size];
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < int(bw*bh); i++)
{
const uint x = i % bw;
const uint y = i / bw;
ColorBlock rgba;
if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
rgba.init(w, h, (uint *)data, 4*x, 4*y);
}
else {
nvDebugCheck(inputFormat == nvtt::InputFormat_RGBA_32F);
rgba.init(w, h, (float *)data, 4*x, 4*y);
}
uint8 * ptr = mem + (y * bw + x) * bs;
compressBlock(rgba, alphaMode, compressionOptions, ptr);
} // omp for
} // omp parallel
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, size);
}
delete [] mem;
}
#endif
}
void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT1 * block = new(output) BlockDXT1;
QuickCompress::compressDXT1(rgba, block);
}
void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT1 * block = new(output) BlockDXT1;
QuickCompress::compressDXT1a(rgba, block);
}
void FastCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT3 * block = new(output) BlockDXT3;
QuickCompress::compressDXT3(rgba, block);
}
void FastCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT5 * block = new(output) BlockDXT5;
QuickCompress::compressDXT5(rgba, block);
}
void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R
BlockDXT5 * block = new(output) BlockDXT5;
QuickCompress::compressDXT5(rgba, block);
}
void FastCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI1 * block = new(output) BlockATI1;
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
QuickCompress::compressDXT5A(rgba, &block->alpha);
}
void FastCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI2 * block = new(output) BlockATI2;
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
QuickCompress::compressDXT5A(rgba, &block->x);
rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
QuickCompress::compressDXT5A(rgba, &block->y);
}
void NormalCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
if (rgba.isSingleColor())
{
BlockDXT1 * block = new(output) BlockDXT1;
OptimalCompress::compressDXT1(rgba.color(0), block);
}
else
{
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, nvsquish::kDxt1);
fit.Compress(output);
}
}
void NormalCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
bool anyAlpha = false;
bool allAlpha = true;
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a < 128) anyAlpha = true;
else allAlpha = false;
}
const bool isSingleColor = rgba.isSingleColor();
if ((!anyAlpha && isSingleColor || allAlpha))
{
BlockDXT1 * block = new(output) BlockDXT1;
OptimalCompress::compressDXT1a(rgba.color(0), block);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
int flags = nvsquish::kDxt1;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, nvsquish::kDxt1);
fit.Compress(output);
}
}
void NormalCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT3 * block = new(output) BlockDXT3;
// Compress explicit alpha.
OptimalCompress::compressDXT3A(rgba, &block->alpha);
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
}
void NormalCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT5 * block = new(output) BlockDXT5;
// Compress alpha.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block->alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block->alpha);
}
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
}
void NormalCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R
BlockDXT5 * block = new(output) BlockDXT5;
// Compress X.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block->alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block->alpha);
}
// Compress Y.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT1G(rgba, &block->color);
}
else
{
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1G(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(0, 1, 0);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
}
}
void ProductionCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI1 * block = new(output) BlockATI1;
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
OptimalCompress::compressDXT5A(rgba, &block->alpha);
}
void ProductionCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI2 * block = new(output) BlockATI2;
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
OptimalCompress::compressDXT5A(rgba, &block->x);
rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
OptimalCompress::compressDXT5A(rgba, &block->y);
}
#if defined(HAVE_S3QUANT)
void S3CompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
float error = 0.0f;
BlockDXT1 dxtBlock3;
BlockDXT1 dxtBlock4;
ColorBlock block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
block.init(inputFormat, w, h, data, x, y);
// Init rgb block.
RGBBlock rgbBlock;
rgbBlock.n = 16;
for (uint i = 0; i < 16; i++) {
rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f);
}
rgbBlock.weight[0] = 1.0f;
rgbBlock.weight[1] = 1.0f;
rgbBlock.weight[2] = 1.0f;
rgbBlock.inLevel = 4;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock4.col0.r = rgbBlock.endPoint[0][0];
dxtBlock4.col0.g = rgbBlock.endPoint[0][1];
dxtBlock4.col0.b = rgbBlock.endPoint[0][2];
dxtBlock4.col1.r = rgbBlock.endPoint[1][0];
dxtBlock4.col1.g = rgbBlock.endPoint[1][1];
dxtBlock4.col1.b = rgbBlock.endPoint[1][2];
dxtBlock4.setIndices(rgbBlock.index);
if (dxtBlock4.col0.u < dxtBlock4.col1.u) {
swap(dxtBlock4.col0.u, dxtBlock4.col1.u);
dxtBlock4.indices ^= 0x55555555;
}
uint error4 = blockError(block, dxtBlock4);
rgbBlock.inLevel = 3;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock3.col0.r = rgbBlock.endPoint[0][0];
dxtBlock3.col0.g = rgbBlock.endPoint[0][1];
dxtBlock3.col0.b = rgbBlock.endPoint[0][2];
dxtBlock3.col1.r = rgbBlock.endPoint[1][0];
dxtBlock3.col1.g = rgbBlock.endPoint[1][1];
dxtBlock3.col1.b = rgbBlock.endPoint[1][2];
dxtBlock3.setIndices(rgbBlock.index);
if (dxtBlock3.col0.u > dxtBlock3.col1.u) {
swap(dxtBlock3.col0.u, dxtBlock3.col1.u);
dxtBlock3.indices ^= (~dxtBlock3.indices >> 1) & 0x55555555;
}
uint error3 = blockError(block, dxtBlock3);
if (error3 < error4) {
error += error3;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3));
}
}
else {
error += error4;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4));
}
}
}
}
}
#endif // defined(HAVE_S3QUANT)
#if defined(HAVE_ATITC)
void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
ATI_TC_CompressOptions options;
options.dwSize = sizeof(options);
options.bUseChannelWeighting = false;
options.bUseAdaptiveWeighting = false;
options.bDXT1UseAlpha = false;
options.nCompressionSpeed = ATI_TC_Speed_Normal;
options.bDisableMultiThreading = false;
//options.bDisableMultiThreading = true;
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT5;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
#endif // defined(HAVE_ATITC)
#if defined(HAVE_SQUISH)
void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
/*
Image img(*image);
int count = img.width() * img.height();
for (int i = 0; i < count; i++)
{
Color32 c = img.pixel(i);
img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
void * blocks = mem::malloc(size);
squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(blocks, size);
}
mem::free(blocks);
*/
}
#endif // defined(HAVE_SQUISH)
#if defined(HAVE_D3DX)
void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS presentParams;
ZeroMemory(&presentParams, sizeof(presentParams));
presentParams.Windowed = TRUE;
presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
presentParams.BackBufferWidth = 8;
presentParams.BackBufferHeight = 8;
presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
HRESULT err;
IDirect3DDevice9 * device = NULL;
err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
IDirect3DTexture9 * texture = NULL;
err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
IDirect3DSurface9 * surface = NULL;
err = texture->GetSurfaceLevel(0, &surface);
RECT rect;
rect.left = 0;
rect.top = 0;
rect.bottom = h;
rect.right = w;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
}
else
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
}
if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
{
D3DLOCKED_RECT rect;
ZeroMemory(&rect, sizeof(rect));
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
err = surface->UnlockRect();
}
surface->Release();
device->Release();
d3d->Release();
}
#endif // defined(HAVE_D3DX)
#if defined(HAVE_STB)
void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(2, 1, 0, 3); // Swap R and B
stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
}
#endif // defined(HAVE_STB)

View File

@ -1,179 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSORDXT_H
#define NV_TT_COMPRESSORDXT_H
#include <nvcore/nvcore.h>
#include "nvtt.h"
#include "Compressor.h"
namespace nv
{
struct ColorBlock;
struct FixedBlockCompressor : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
virtual uint blockSize() const = 0;
};
// Fast CPU compressors.
struct FastCompressorDXT1 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct FastCompressorDXT1a : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct FastCompressorDXT3 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
struct FastCompressorDXT5 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
struct FastCompressorDXT5n : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
struct FastCompressorBC4 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct FastCompressorBC5 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
// Normal CPU compressors.
struct NormalCompressorDXT1 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct NormalCompressorDXT1a : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct NormalCompressorDXT3 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
struct NormalCompressorDXT5 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
struct NormalCompressorDXT5n : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
// Production CPU compressors.
struct ProductionCompressorBC4 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct ProductionCompressorBC5 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
// External compressors.
#if defined(HAVE_S3QUANT)
struct S3CompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_ATITC)
struct AtiCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct AtiCompressorDXT5 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_SQUISH)
struct SquishCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_D3DX)
struct D3DXCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_STB)
struct StbCompressorDXT1 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
} // nv namespace
#endif // NV_TT_COMPRESSORDXT_H

View File

@ -1,230 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "CompressorRGB.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include <nvimage/PixelFormat.h>
#include <nvmath/Color.h>
#include <nvmath/Half.h>
#include <nvcore/Debug.h>
using namespace nv;
using namespace nvtt;
namespace
{
inline uint computePitch(uint w, uint bitsize)
{
uint p = w * ((bitsize + 7) / 8);
// Align to 32 bits.
return ((p + 3) / 4) * 4;
}
inline void convert_to_a8r8g8b8(const void * src, void * dst, uint w)
{
memcpy(dst, src, 4 * w);
}
inline void convert_to_x8r8g8b8(const void * src, void * dst, uint w)
{
memcpy(dst, src, 4 * w);
}
static uint16 to_half(float f)
{
union { float f; uint32 u; } c;
c.f = f;
return half_from_float(c.u);
}
} // namespace
void PixelFormatConverter::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
uint bitCount;
uint rmask, rshift, rsize;
uint gmask, gshift, gsize;
uint bmask, bshift, bsize;
uint amask, ashift, asize;
if (compressionOptions.pixelType == nvtt::PixelType_Float)
{
rsize = compressionOptions.rsize;
gsize = compressionOptions.gsize;
bsize = compressionOptions.bsize;
asize = compressionOptions.asize;
nvCheck(rsize == 0 || rsize == 16 || rsize == 32);
nvCheck(gsize == 0 || gsize == 16 || gsize == 32);
nvCheck(bsize == 0 || bsize == 16 || bsize == 32);
nvCheck(asize == 0 || asize == 16 || asize == 32);
bitCount = rsize + gsize + bsize + asize;
}
else
{
if (compressionOptions.bitcount != 0)
{
bitCount = compressionOptions.bitcount;
nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32);
rmask = compressionOptions.rmask;
gmask = compressionOptions.gmask;
bmask = compressionOptions.bmask;
amask = compressionOptions.amask;
PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
PixelFormat::maskShiftAndSize(amask, &ashift, &asize);
}
else
{
rsize = compressionOptions.rsize;
gsize = compressionOptions.gsize;
bsize = compressionOptions.bsize;
asize = compressionOptions.asize;
bitCount = rsize + gsize + bsize + asize;
nvCheck(bitCount <= 32);
ashift = 0;
bshift = ashift + asize;
gshift = bshift + bsize;
rshift = gshift + gsize;
rmask = ((1 << rsize) - 1) << rshift;
gmask = ((1 << gsize) - 1) << gshift;
bmask = ((1 << bsize) - 1) << bshift;
amask = ((1 << asize) - 1) << ashift;
}
}
uint byteCount = (bitCount + 7) / 8;
uint pitch = computePitch(w, bitCount);
uint srcPitch = w;
uint srcPlane = w * h;
// Allocate output scanline.
uint8 * dst = (uint8 *)mem::malloc(pitch + 4);
for (uint y = 0; y < h; y++)
{
const uint * src = (const uint *)data + y * srcPitch;
const float * fsrc = (const float *)data + y * srcPitch;
uint8 * ptr = dst;
for (uint x = 0; x < w; x++)
{
float r, g, b, a;
if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
Color32 c = Color32(src[x]);
r = float(c.r) / 255.0f;
g = float(c.g) / 255.0f;
b = float(c.b) / 255.0f;
a = float(c.a) / 255.0f;
}
else {
nvDebugCheck (inputFormat == nvtt::InputFormat_RGBA_32F);
//r = ((float *)src)[4 * x + 0]; // Color components not interleaved.
//g = ((float *)src)[4 * x + 1];
//b = ((float *)src)[4 * x + 2];
//a = ((float *)src)[4 * x + 3];
r = fsrc[x + 0 * srcPlane];
g = fsrc[x + 1 * srcPlane];
b = fsrc[x + 2 * srcPlane];
a = fsrc[x + 3 * srcPlane];
}
if (compressionOptions.pixelType == nvtt::PixelType_Float)
{
if (rsize == 32) *((float *)ptr) = r;
else if (rsize == 16) *((uint16 *)ptr) = to_half(r);
ptr += rsize / 8;
if (gsize == 32) *((float *)ptr) = g;
else if (gsize == 16) *((uint16 *)ptr) = to_half(g);
ptr += gsize / 8;
if (bsize == 32) *((float *)ptr) = b;
else if (bsize == 16) *((uint16 *)ptr) = to_half(b);
ptr += bsize / 8;
if (asize == 32) *((float *)ptr) = a;
else if (asize == 16) *((uint16 *)ptr) = to_half(a);
ptr += asize / 8;
}
else
{
Color32 c;
if (compressionOptions.pixelType == nvtt::PixelType_UnsignedNorm) {
c.r = uint8(clamp(r * 255, 0.0f, 255.0f));
c.g = uint8(clamp(g * 255, 0.0f, 255.0f));
c.b = uint8(clamp(b * 255, 0.0f, 255.0f));
c.a = uint8(clamp(a * 255, 0.0f, 255.0f));
}
// @@ Add support for nvtt::PixelType_SignedInt, nvtt::PixelType_SignedNorm, nvtt::PixelType_UnsignedInt
uint p = 0;
p |= PixelFormat::convert(c.r, 8, rsize) << rshift;
p |= PixelFormat::convert(c.g, 8, gsize) << gshift;
p |= PixelFormat::convert(c.b, 8, bsize) << bshift;
p |= PixelFormat::convert(c.a, 8, asize) << ashift;
// Output one byte at a time.
for (uint i = 0; i < byteCount; i++)
{
*(dst + x * byteCount + i) = (p >> (i * 8)) & 0xFF;
}
}
}
// Zero padding.
for (uint x = w * byteCount; x < pitch; x++)
{
*(dst + x) = 0;
}
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(dst, pitch);
}
}
mem::free(dst);
}

View File

@ -1,102 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "CompressorRGBE.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include <nvmath/Color.h>
#include <nvcore/Debug.h>
using namespace nv;
using namespace nvtt;
static Color32 toRgbe8(float r, float g, float b)
{
Color32 c;
float v = max(max(r, g), b);
if (v < 1e-32) {
c.r = c.g = c.b = c.a = 0;
}
else {
int e;
v = frexp(v, &e) * 256.0f / v;
c.r = uint8(clamp(r * v, 0.0f, 255.0f));
c.g = uint8(clamp(g * v, 0.0f, 255.0f));
c.b = uint8(clamp(b * v, 0.0f, 255.0f));
c.a = e + 128;
}
return c;
}
void CompressorRGBE::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck (compressionOptions.format == nvtt::Format_RGBE);
uint srcPitch = w;
uint srcPlane = w * h;
// Allocate output scanline.
Color32 * dst = (Color32 *)mem::malloc(w);
for (uint y = 0; y < h; y++)
{
const uint * src = (const uint *)data + y * srcPitch;
const float * fsrc = (const float *)data + y * srcPitch;
for (uint x = 0; x < w; x++)
{
float r, g, b;
if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
Color32 c = Color32(src[x]);
r = float(c.r) / 255.0f;
g = float(c.g) / 255.0f;
b = float(c.b) / 255.0f;
}
else {
nvDebugCheck (inputFormat == nvtt::InputFormat_RGBA_32F);
// Color components not interleaved.
r = fsrc[x + 0 * srcPlane];
g = fsrc[x + 1 * srcPlane];
b = fsrc[x + 2 * srcPlane];
}
dst[x] = toRgbe8(r, g, b);
}
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(dst, w * 4);
}
}
mem::free(dst);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,87 +0,0 @@
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_CONTEXT_H
#define NV_TT_CONTEXT_H
#include "nvcore/Ptr.h"
#include "nvtt/Compressor.h"
#include "nvtt/cuda/CudaCompressorDXT.h"
#include "nvtt.h"
namespace nv
{
class Image;
}
namespace nvtt
{
struct Mipmap;
struct Compressor::Private
{
Private() {}
bool compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
bool compress2D(InputFormat inputFormat, AlphaMode alphaMode, int w, int h, const void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
int estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const;
bool outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions);
private:
bool outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const;
nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const;
bool compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
bool initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const;
int findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
int findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
void premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
public:
bool cudaSupported;
bool cudaEnabled;
nv::AutoPtr<nv::CudaContext> cuda;
};
} // nvtt namespace
#endif // NV_TT_CONTEXT_H

View File

@ -23,11 +23,8 @@
#include <string.h> // memcpy
#include <nvcore/Containers.h> // nextPowerOfTwo
#include <nvcore/Memory.h>
#include <nvmath/Color.h>
#include "nvtt.h"
#include "InputOptions.h"
@ -104,8 +101,6 @@ void InputOptions::reset()
m.colorTransform = ColorTransform_None;
m.linearTransform = Matrix(identity);
for (int i = 0; i < 4; i++) m.colorOffsets[i] = 0;
for (int i = 0; i < 4; i++) m.swizzleTransform[i] = i;
m.generateMipmaps = true;
m.maxLevel = -1;
@ -123,8 +118,6 @@ void InputOptions::reset()
m.maxExtent = 0;
m.roundMode = RoundMode_None;
m.premultiplyAlpha = false;
}
@ -168,8 +161,7 @@ void InputOptions::setTextureLayout(TextureType type, int width, int height, int
img.mipLevel = mipLevel;
img.face = f;
img.uint8data = NULL;
img.floatdata = NULL;
img.data = NULL;
w = max(1U, w / 2);
h = max(1U, h / 2);
@ -207,116 +199,14 @@ bool InputOptions::setMipmapData(const void * data, int width, int height, int d
return false;
}
switch(m.inputFormat)
{
case InputFormat_BGRA_8UB:
if (Image * image = new nv::Image())
{
image->allocate(width, height);
memcpy(image->pixels(), data, width * height * 4);
m.images[idx].uint8data = image;
}
else
{
// @@ Out of memory error.
return false;
}
break;
case InputFormat_RGBA_32F:
if (FloatImage * image = new nv::FloatImage())
{
const float * floatData = (const float *)data;
image->allocate(4, width, height);
for (int c = 0; c < 4; c++)
{
float * channel = image->channel(c);
for (int i = 0; i < width * height; i++)
{
channel[i] = floatData[i*4 + c];
}
}
m.images[idx].floatdata = image;
}
else
{
// @@ Out of memory error.
return false;
}
break;
default:
return false;
}
m.images[idx].data = new nv::Image();
m.images[idx].data->allocate(width, height);
memcpy(m.images[idx].data->pixels(), data, width * height * 4);
return true;
}
// Copies data
bool InputOptions::setMipmapChannelData(const void * data, int channel, int width, int height, int depth /*= 1*/, int face /*= 0*/, int mipLevel /*= 0*/)
{
nvCheck(depth == 1);
nvCheck(channel >= 0 && channel < 4);
const int idx = face * m.mipmapCount + mipLevel;
if (m.images[idx].width != width || m.images[idx].height != height || m.images[idx].depth != depth || m.images[idx].mipLevel != mipLevel || m.images[idx].face != face)
{
// Invalid dimension or index.
return false;
}
// Allocate image if not allocated already.
if (m.inputFormat == InputFormat_BGRA_8UB)
{
m.images[idx].floatdata = NULL;
if (m.images[idx].uint8data == NULL)
{
m.images[idx].uint8data = new Image();
m.images[idx].uint8data->allocate(width, height);
m.images[idx].uint8data->fill(Color32(0,0,0,0));
}
}
else if (m.inputFormat == InputFormat_RGBA_32F)
{
m.images[idx].uint8data = NULL;
if (m.images[idx].floatdata == NULL)
{
m.images[idx].floatdata = new FloatImage();
m.images[idx].floatdata->allocate(4, width, height);
m.images[idx].floatdata->clear();
}
}
else
{
m.images[idx].floatdata = NULL;
m.images[idx].uint8data = NULL;
return false;
}
// Copy channel data to image.
if (m.inputFormat == InputFormat_BGRA_8UB)
{
// @@ TODO
}
else if (m.inputFormat == InputFormat_RGBA_32F)
{
const float * floatData = (const float *)data;
float * channelPtr = m.images[idx].floatdata->channel(channel);
for (int i = 0; i < width * height; i++)
{
channelPtr[i] = floatData[i];
}
}
return true;
}
/// Describe the format of the input.
void InputOptions::setFormat(InputFormat format)
{
@ -411,32 +301,8 @@ void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2,
{
nvCheck(channel >= 0 && channel < 4);
m.linearTransform(channel, 0) = w0;
m.linearTransform(channel, 1) = w1;
m.linearTransform(channel, 2) = w2;
m.linearTransform(channel, 3) = w3;
}
void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2, float w3, float offset)
{
nvCheck(channel >= 0 && channel < 4);
setLinearTransform(channel, w0, w1, w2, w3);
m.colorOffsets[channel] = offset;
}
void InputOptions::setSwizzleTransform(int x, int y, int z, int w)
{
nvCheck(x >= 0 && x <= 6);
nvCheck(y >= 0 && y <= 6);
nvCheck(z >= 0 && z <= 6);
nvCheck(w >= 0 && w <= 6);
m.swizzleTransform[0] = x;
m.swizzleTransform[1] = y;
m.swizzleTransform[2] = z;
m.swizzleTransform[3] = w;
Vector4 w(w0, w1, w2, w3);
//m.linearTransform.setRow(channel, w);
}
void InputOptions::setMaxExtents(int e)
@ -450,10 +316,6 @@ void InputOptions::setRoundMode(RoundMode mode)
m.roundMode = mode;
}
void InputOptions::setPremultiplyAlpha(bool b)
{
m.premultiplyAlpha = b;
}
void InputOptions::Private::computeTargetExtents() const
{
@ -533,7 +395,7 @@ const Image * InputOptions::Private::image(uint face, uint mipmap) const
nvDebugCheck(image.face == face);
nvDebugCheck(image.mipLevel == mipmap);
return image.uint8data.ptr();
return image.data.ptr();
}
const Image * InputOptions::Private::image(uint idx) const
@ -542,14 +404,5 @@ const Image * InputOptions::Private::image(uint idx) const
const InputImage & image = this->images[idx];
return image.uint8data.ptr();
}
const FloatImage * InputOptions::Private::floatImage(uint idx) const
{
nvDebugCheck(idx < faceCount * mipmapCount);
const InputImage & image = this->images[idx];
return image.floatdata.ptr();
return image.data.ptr();
}

View File

@ -28,7 +28,6 @@
#include <nvmath/Vector.h>
#include <nvmath/Matrix.h>
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include "nvtt.h"
namespace nvtt
@ -57,8 +56,6 @@ namespace nvtt
// Color transform.
ColorTransform colorTransform;
nv::Matrix linearTransform;
float colorOffsets[4];
uint swizzleTransform[4];
// Mipmap generation options.
bool generateMipmaps;
@ -81,8 +78,6 @@ namespace nvtt
uint maxExtent;
RoundMode roundMode;
bool premultiplyAlpha;
// @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere...
mutable uint targetWidth;
mutable uint targetHeight;
@ -94,9 +89,7 @@ namespace nvtt
int realMipmapCount() const;
const nv::Image * image(uint face, uint mipmap) const;
const nv::Image * image(uint idx) const;
const nv::FloatImage * floatImage(uint idx) const;
const nv::Image * image(uint idx) const;
};
@ -105,8 +98,6 @@ namespace nvtt
{
InputImage() {}
bool hasValidData() const { return uint8data != NULL || floatdata != NULL; }
int mipLevel;
int face;
@ -114,8 +105,7 @@ namespace nvtt
int height;
int depth;
nv::AutoPtr<nv::Image> uint8data;
nv::AutoPtr<nv::FloatImage> floatdata;
nv::AutoPtr<nv::Image> data;
};
} // nvtt namespace

View File

@ -21,17 +21,16 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvmath/Color.h>
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
#include <nvcore/Containers.h> // swap
#include <limits.h>
using namespace nv;
using namespace OptimalCompress;
@ -40,37 +39,10 @@ using namespace OptimalCompress;
namespace
{
static int greenDistance(int g0, int g1)
{
//return abs(g0 - g1);
int d = g0 - g1;
return d * d;
}
static int alphaDistance(int a0, int a1)
{
//return abs(a0 - a1);
int d = a0 - a1;
return d * d;
}
static uint nearestGreen4(uint green, uint maxGreen, uint minGreen)
{
uint bias = maxGreen + (maxGreen - minGreen) / 6;
uint index = 0;
if (maxGreen - minGreen != 0) index = clamp(3 * (bias - green) / (maxGreen - minGreen), 0U, 3U);
return (index * minGreen + (3 - index) * maxGreen) / 3;
}
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block, int bestError = INT_MAX)
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
// uint g0 = (block->col0.g << 2) | (block->col0.g >> 4);
// uint g1 = (block->col1.g << 2) | (block->col1.g >> 4);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
@ -78,24 +50,17 @@ namespace
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = greenDistance(green, palette[0]);
error = min(error, greenDistance(green, palette[1]));
error = min(error, greenDistance(green, palette[2]));
error = min(error, greenDistance(green, palette[3]));
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
// totalError += nearestGreen4(green, g0, g1);
if (totalError > bestError)
{
// early out
return totalError;
}
}
return totalError;
@ -113,10 +78,10 @@ namespace
{
const int color = rgba.color(i).g;
uint d0 = greenDistance(color0, color);
uint d1 = greenDistance(color1, color);
uint d2 = greenDistance(color2, color);
uint d3 = greenDistance(color3, color);
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
@ -137,78 +102,49 @@ namespace
// Choose quantized color that produces less error. Used by DXT3 compressor.
inline static uint quantize4(uint8 a)
{
int q0 = max(int(a >> 4) - 1, 0);
int q0 = (a >> 4) - 1;
int q1 = (a >> 4);
int q2 = min(int(a >> 4) + 1, 0xF);
int q2 = (a >> 4) + 1;
q0 = (q0 << 4) | q0;
q1 = (q1 << 4) | q1;
q2 = (q2 << 4) | q2;
int d0 = alphaDistance(q0, a);
int d1 = alphaDistance(q1, a);
int d2 = alphaDistance(q2, a);
int d0 = abs(q0 - a);
int d1 = abs(q1 - a);
int d2 = abs(q2 - a);
if (d0 < d1 && d0 < d2) return q0 >> 4;
if (d1 < d2) return q1 >> 4;
return q2 >> 4;
}
static uint nearestAlpha8(uint alpha, uint maxAlpha, uint minAlpha)
{
float bias = maxAlpha + float(maxAlpha - minAlpha) / (2.0f * 7.0f);
float scale = 7.0f / float(maxAlpha - minAlpha);
uint index = (uint)clamp((bias - float(alpha)) * scale, 0.0f, 7.0f);
return (index * minAlpha + (7 - index) * maxAlpha) / 7;
}
static uint computeAlphaError8(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
{
int totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
totalError += alphaDistance(alpha, nearestAlpha8(alpha, block->alpha0, block->alpha1));
if (totalError > bestError)
{
// early out
return totalError;
}
}
return totalError;
}
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
int totalError = 0;
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int minDist = INT_MAX;
uint besterror = 256*256;
uint best;
for (uint p = 0; p < 8; p++)
{
int dist = alphaDistance(alpha, alphas[p]);
minDist = min(dist, minDist);
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += minDist;
if (totalError > bestError)
{
// early out
return totalError;
}
totalError += besterror;
}
return totalError;
@ -223,21 +159,22 @@ namespace
{
uint8 alpha = rgba.color(i).a;
int minDist = INT_MAX;
int bestIndex = 8;
for (uint p = 0; p < 8; p++)
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int dist = alphaDistance(alpha, alphas[p]);
int d = alphas[p] - alpha;
uint error = d * d;
if (dist < minDist)
if (error < besterror)
{
minDist = dist;
bestIndex = p;
besterror = error;
best = p;
}
}
nvDebugCheck(bestIndex < 8);
nvDebugCheck(best < 8);
block->setIndex(i, bestIndex);
block->setIndex(i, best);
}
}
@ -280,23 +217,6 @@ void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
}
}
void OptimalCompress::compressDXT1G(uint8 g, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = 31;
dxtBlock->col0.g = OMatch6[g][0];
dxtBlock->col0.b = 0;
dxtBlock->col1.r = 31;
dxtBlock->col1.g = OMatch6[g][1];
dxtBlock->col1.b = 0;
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
// Brute force green channel compressor
void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
@ -306,23 +226,12 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
uint8 ming = 63;
uint8 maxg = 0;
bool isSingleColor = true;
uint8 singleColor = rgba.color(0).g;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = (rgba.color(i).g + 1) >> 2;
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
if (rgba.color(i).g != singleColor) isSingleColor = false;
}
if (isSingleColor)
{
compressDXT1G(singleColor, block);
return;
}
block->col0.r = 31;
@ -332,38 +241,36 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
block->col0.b = 0;
block->col1.b = 0;
int bestError = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
// Expand search space a bit.
const int greenExpand = 4;
ming = (ming <= greenExpand) ? 0 : ming - greenExpand;
maxg = (maxg >= 63-greenExpand) ? 63 : maxg + greenExpand;
for (int g0 = ming+1; g0 <= maxg; g0++)
if (maxg - ming > 4)
{
for (int g1 = ming; g1 < g0; g1++)
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block, bestError);
if (error < bestError)
for (int g1 = ming; g1 < g0-4; g1++)
{
bestError = error;
bestg0 = g0;
bestg1 = g1;
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
block->col0.g = bestg0;
block->col1.g = bestg1;
nvDebugCheck(bestg0 == bestg1 || block->isFourColorMode());
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
@ -406,26 +313,42 @@ void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dx
dxtBlock->alpha0 = maxa;
dxtBlock->alpha1 = mina;
/*int centroidDist = 256;
int centroid;
// Get the closest to the centroid.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int dist = abs(alpha - (maxa + mina) / 2);
if (dist < centroidDist)
{
centroidDist = dist;
centroid = alpha;
}
}*/
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, dxtBlock);
int besta0 = maxa;
int besta1 = mina;
// Expand search space a bit.
const int alphaExpand = 8;
mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand;
maxa = (maxa >= 255-alphaExpand) ? 255 : maxa + alphaExpand;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
//for (int a1 = mina; a1 < maxa; a1++)
{
nvDebugCheck(a0 - a1 > 8);
//nvCheck(abs(a1-a0) > 8);
//if (abs(a0 - a1) < 8) continue;
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
if ((maxa-a0) + (a1-mina) > besterror)
continue;
dxtBlock->alpha0 = a0;
dxtBlock->alpha1 = a1;
int error = computeAlphaError(rgba, dxtBlock, besterror);
int error = computeAlphaError(rgba, dxtBlock);
if (error < besterror)
{

View File

@ -26,8 +26,6 @@
#include <nvimage/nvimage.h>
#include <nvmath/Color.h>
namespace nv
{
struct ColorBlock;
@ -41,7 +39,6 @@ namespace nv
{
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(uint8 g, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);

View File

@ -33,9 +33,6 @@ OutputOptions::OutputOptions() : m(*new OutputOptions::Private())
OutputOptions::~OutputOptions()
{
// Cleanup output handler.
setOutputHandler(NULL);
delete &m;
}
@ -46,31 +43,20 @@ void OutputOptions::reset()
m.outputHandler = NULL;
m.errorHandler = NULL;
m.outputHeader = true;
m.container = Container_DDS;
}
/// Set output file name.
void OutputOptions::setFileName(const char * fileName)
{
m.fileName = fileName; // @@ Do we need to record filename?
m.fileName = fileName;
m.outputHandler = NULL;
DefaultOutputHandler * oh = new DefaultOutputHandler(fileName);
if (!oh->stream.isError())
{
m.outputHandler = oh;
}
}
/// Set output handler.
void OutputOptions::setOutputHandler(OutputHandler * outputHandler)
{
if (!m.fileName.isNull())
{
delete m.outputHandler;
m.fileName.reset();
}
m.fileName.reset();
m.outputHandler = outputHandler;
}
@ -86,20 +72,31 @@ void OutputOptions::setOutputHeader(bool outputHeader)
m.outputHeader = outputHeader;
}
/// Set container.
void OutputOptions::setContainer(Container container)
{
m.container = container;
}
bool OutputOptions::Private::hasValidOutputHandler() const
bool OutputOptions::Private::openFile() const
{
if (!fileName.isNull())
{
return outputHandler != NULL;
nvCheck(outputHandler == NULL);
DefaultOutputHandler * oh = new DefaultOutputHandler(fileName.str());
if (oh->stream.isError())
{
return false;
}
outputHandler = oh;
}
return true;
}
void OutputOptions::Private::closeFile() const
{
if (!fileName.isNull())
{
delete outputHandler;
outputHandler = NULL;
}
}

View File

@ -52,7 +52,7 @@ namespace nvtt
//return !stream.isError();
return true;
}
nv::StdOutputStream stream;
};
@ -61,12 +61,12 @@ namespace nvtt
{
nv::Path fileName;
OutputHandler * outputHandler;
mutable OutputHandler * outputHandler;
ErrorHandler * errorHandler;
bool outputHeader;
Container container;
bool hasValidOutputHandler() const;
bool openFile() const;
void closeFile() const;
};

Some files were not shown because too many files have changed in this diff Show More