Import all sources from perforce.

This commit is contained in:
castano
2007-04-17 08:49:19 +00:00
commit 7543dd1efa
197 changed files with 49819 additions and 0 deletions

92
src/CMakeLists.txt Normal file
View File

@ -0,0 +1,92 @@
SUBDIRS(nvcore)
SUBDIRS(nvmath)
SUBDIRS(nvimage)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
# OpenGL
INCLUDE(FindOpenGL)
IF(OPENGL_FOUND)
MESSAGE(STATUS "Looking for OpenGL - found")
ELSE(OPENGL_FOUND)
MESSAGE(STATUS "Looking for OpenGL - not found")
ENDIF(OPENGL_FOUND)
# GLUT
INCLUDE(${NV_CMAKE_DIR}/FindGLUT.cmake)
#INCLUDE(FindGLUT)
IF(GLUT_FOUND)
MESSAGE(STATUS "Looking for GLUT - found")
ELSE(GLUT_FOUND)
MESSAGE(STATUS "Looking for GLUT - not found")
ENDIF(GLUT_FOUND)
# GLEW
INCLUDE(${NV_CMAKE_DIR}/FindGLEW.cmake)
IF(GLEW_FOUND)
MESSAGE(STATUS "Looking for GLEW - found")
ELSE(GLEW_FOUND)
MESSAGE(STATUS "Looking for GLEW - not found")
ENDIF(GLEW_FOUND)
# Cg
INCLUDE(${NV_CMAKE_DIR}/FindCg.cmake)
IF(CG_FOUND)
MESSAGE(STATUS "Looking for Cg - found")
ELSE(CG_FOUND)
MESSAGE(STATUS "Looking for Cg - not found")
ENDIF(CG_FOUND)
# CUDA
INCLUDE(${NV_CMAKE_DIR}/FindCUDA.cmake)
IF(CUDA_FOUND)
MESSAGE(STATUS "Looking for CUDA - found")
ELSE(CUDA_FOUND)
MESSAGE(STATUS "Looking for CUDA - not found")
ENDIF(CUDA_FOUND)
# JPEG
INCLUDE(FindJPEG)
IF(JPEG_FOUND)
SET(HAVE_JPEG JPEG_FOUND)
MESSAGE(STATUS "Looking for JPEG - found")
ELSE(JPEG_FOUND)
MESSAGE(STATUS "Looking for JPEG - not found")
ENDIF(JPEG_FOUND)
# PNG
INCLUDE(FindPNG)
IF(PNG_FOUND)
SET(HAVE_PNG PNG_FOUND)
MESSAGE(STATUS "Looking for PNG - found")
ELSE(PNG_FOUND)
MESSAGE(STATUS "Looking for PNG - not found")
ENDIF(PNG_FOUND)
# TIFF
INCLUDE(FindTIFF)
IF(TIFF_FOUND)
SET(HAVE_TIFF TIFF_FOUND)
MESSAGE(STATUS "Looking for TIFF - found")
ELSE(TIFF_FOUND)
MESSAGE(STATUS "Looking for TIFF - not found")
ENDIF(TIFF_FOUND)
# Qt
FIND_PACKAGE(Qt4)
# Threads
FIND_PACKAGE(Threads)
# configuration file
INCLUDE(CheckIncludeFiles)
CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILES(stdarg.h HAVE_STDARG_H)
CHECK_INCLUDE_FILES(signal.h HAVE_SIGNAL_H)
CHECK_INCLUDE_FILES(execinfo.h HAVE_EXECINFO_H)
CHECK_INCLUDE_FILES(malloc.h HAVE_MALLOC_H)
CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/nvconfig.h.in ${CMAKE_CURRENT_BINARY_DIR}/nvconfig.h)

14
src/nvconfig.h.in Normal file
View File

@ -0,0 +1,14 @@
#ifndef NV_CONFIG
#define NV_CONFIG
#cmakedefine HAVE_UNISTD_H
#cmakedefine HAVE_STDARG_H
#cmakedefine HAVE_SIGNAL_H
#cmakedefine HAVE_EXECINFO_H
#cmakedefine HAVE_MALLOC_H
#cmakedefine HAVE_PNG
#cmakedefine HAVE_JPEG
#cmakedefine HAVE_TIFF
#endif // NV_CONFIG

168
src/nvcore/BitArray.h Normal file
View File

@ -0,0 +1,168 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_BITARRAY_H
#define NV_CORE_BITARRAY_H
#include <nvcore/nvcore.h>
#include <nvcore/Containers.h>
namespace nv
{
/// Count the bits of @a x.
inline uint bitsSet(uint8 x) {
uint count = 0;
for(; x != 0; x >>= 1) {
count += (x & 1);
}
return count;
}
/// Count the bits of @a x.
inline uint bitsSet(uint32 x, int bits) {
uint count = 0;
for(; x != 0 && bits != 0; x >>= 1, bits--) {
count += (x & 1);
}
return count;
}
/// Simple bit array.
class BitArray
{
public:
/// Default ctor.
BitArray() {}
/// Ctor with initial m_size.
BitArray(uint sz)
{
resize(sz);
}
/// Get array m_size.
uint size() const { return m_size; }
/// Clear array m_size.
void clear() { resize(0); }
/// Set array m_size.
void resize(uint sz)
{
m_size = sz;
m_bitArray.resize( (m_size + 7) >> 3 );
}
/// Get bit.
bool bitAt(uint b) const
{
nvDebugCheck( b < m_size );
return (m_bitArray[b >> 3] & (1 << (b & 7))) != 0;
}
/// Set a bit.
void setBitAt(uint b)
{
nvDebugCheck( b < m_size );
m_bitArray[b >> 3] |= (1 << (b & 7));
}
/// Clear a bit.
void clearBitAt( uint b )
{
nvDebugCheck( b < m_size );
m_bitArray[b >> 3] &= ~(1 << (b & 7));
}
/// Clear all the bits.
void clearAll()
{
memset(m_bitArray.unsecureBuffer(), 0, m_bitArray.size());
}
/// Set all the bits.
void setAll()
{
memset(m_bitArray.unsecureBuffer(), 0xFF, m_bitArray.size());
}
/// Toggle all the bits.
void toggleAll()
{
const uint byte_num = m_bitArray.size();
for(uint b = 0; b < byte_num; b++) {
m_bitArray[b] ^= 0xFF;
}
}
/// Get a byte of the bit array.
const uint8 & byteAt(uint index) const
{
return m_bitArray[index];
}
/// Set the given byte of the byte array.
void setByteAt(uint index, uint8 b)
{
m_bitArray[index] = b;
}
/// Count the number of bits set.
uint countSetBits() const
{
const uint num = m_bitArray.size();
if( num == 0 ) {
return 0;
}
uint count = 0;
for(uint i = 0; i < num - 1; i++) {
count += bitsSet(m_bitArray[i]);
}
count += bitsSet(m_bitArray[num-1], m_size & 0x7);
//piDebugCheck(count + countClearBits() == m_size);
return count;
}
/// Count the number of bits clear.
uint countClearBits() const {
const uint num = m_bitArray.size();
if( num == 0 ) {
return 0;
}
uint count = 0;
for(uint i = 0; i < num - 1; i++) {
count += bitsSet(~m_bitArray[i]);
}
count += bitsSet(~m_bitArray[num-1], m_size & 0x7);
//piDebugCheck(count + countSetBits() == m_size);
return count;
}
friend void swap(BitArray & a, BitArray & b)
{
swap(a.m_size, b.m_size);
swap(a.m_bitArray, b.m_bitArray);
}
private:
/// Number of bits stored.
uint m_size;
/// Array of bits.
Array<uint8> m_bitArray;
};
} // nv namespace
#endif // _PI_CORE_BITARRAY_H_

36
src/nvcore/CMakeLists.txt Normal file
View File

@ -0,0 +1,36 @@
PROJECT(nvcore)
ADD_SUBDIRECTORY(poshlib)
SET(CORE_SRCS
nvcore.h
BitArray.h
Memory.h
Memory.cpp
Debug.h
Debug.cpp
Containers.h
StrLib.h
StrLib.cpp
Stream.h
StdStream.h
TextReader.h
TextReader.cpp
TextWriter.h
Tokenizer.h
Tokenizer.cpp
Radix.h
Radix.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
# targets
ADD_DEFINITIONS(-DNVCORE_EXPORTS)
IF(NVCORE_SHARED)
ADD_LIBRARY(nvcore SHARED ${CORE_SRCS})
ELSE(NVCORE_SHARED)
ADD_LIBRARY(nvcore ${CORE_SRCS})
ENDIF(NVCORE_SHARED)
TARGET_LINK_LIBRARIES(nvcore ${LIBS})

1054
src/nvcore/Containers.h Normal file

File diff suppressed because it is too large Load Diff

456
src/nvcore/Debug.cpp Normal file
View File

@ -0,0 +1,456 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Debug.h>
#include <nvcore/StrLib.h>
// Extern
#if NV_OS_WIN32 //&& NV_CC_MSVC
# define WIN32_LEAN_AND_MEAN
# define VC_EXTRALEAN
# include <windows.h>
# include <direct.h>
# if NV_CC_MSVC
# include <crtdbg.h>
# if _MSC_VER < 1300
# define DECLSPEC_DEPRECATED
// VC6: change this path to your Platform SDK headers
# include <dbghelp.h> // must be XP version of file
// include "M:\\dev7\\vs\\devtools\\common\\win32sdk\\include\\dbghelp.h"
# else
// VC7: ships with updated headers
# include <dbghelp.h>
# endif
# endif
#endif
#if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
# include <signal.h>
#endif
#if NV_OS_LINUX && defined(HAVE_EXECINFO_H)
# include <execinfo.h>
# if NV_CC_GNUC // defined(HAVE_CXXABI_H)
# include <cxxabi.h>
# endif
#endif
#if NV_OS_DARWIN
# include <unistd.h> // getpid
# include <sys/types.h>
# include <sys/sysctl.h> // sysctl
#endif
#include <stdexcept> // std::runtime_error
#undef assert // defined on mingw
using namespace nv;
namespace
{
static MessageHandler * s_message_handler = NULL;
static AssertHandler * s_assert_handler = NULL;
static bool s_sig_handler_enabled = false;
#if NV_OS_WIN32 && NV_CC_MSVC
// Old exception filter.
static LPTOP_LEVEL_EXCEPTION_FILTER s_old_exception_filter = NULL;
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
// Old signal handlers.
struct sigaction s_old_sigsegv;
struct sigaction s_old_sigtrap;
struct sigaction s_old_sigfpe;
struct sigaction s_old_sigbus;
#endif
#if NV_OS_WIN32 && NV_CC_MSVC
// TODO write minidump
static LONG WINAPI nvTopLevelFilter( struct _EXCEPTION_POINTERS *pExceptionInfo ) {
/* BOOL (WINAPI * Dump) (HANDLE, DWORD, HANDLE, MINIDUMP_TYPE, PMINIDUMP_EXCEPTION_INFORMATION, PMINIDUMP_USER_STREAM_INFORMATION, PMINIDUMP_CALLBACK_INFORMATION );
AutoString dbghelp_path(512);
getcwd(dbghelp_path, 512);
dbghelp_path.Append("\\DbgHelp.dll");
nvTranslatePath(dbghelp_path);
PiLibrary DbgHelp_lib(dbghelp_path, true);
if( !DbgHelp_lib.IsValid() ) {
nvDebug("*** 'DbgHelp.dll' not found.\n");
return EXCEPTION_CONTINUE_SEARCH;
}
if( !DbgHelp_lib.BindSymbol( (void **)&Dump, "MiniDumpWriteDump" ) ) {
nvDebug("*** 'DbgHelp.dll' too old.\n");
return EXCEPTION_CONTINUE_SEARCH;
}
// create the file
HANDLE hFile = ::CreateFile( "nv.dmp", GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL );
if( hFile == INVALID_HANDLE_VALUE ) {
nvDebug("*** Failed to create dump file.\n");
return EXCEPTION_CONTINUE_SEARCH;
}
_MINIDUMP_EXCEPTION_INFORMATION ExInfo;
ExInfo.ThreadId = ::GetCurrentThreadId();
ExInfo.ExceptionPointers = pExceptionInfo;
ExInfo.ClientPointers = NULL;
// write the dump
bool ok = Dump( GetCurrentProcess(), GetCurrentProcessId(), hFile, MiniDumpNormal, &ExInfo, NULL, NULL )!=0;
::CloseHandle(hFile);
if( !ok ) {
nvDebug("*** Failed to save dump file.\n");
return EXCEPTION_CONTINUE_SEARCH;
}
nvDebug("--- Dump file saved.\n");
*/
return EXCEPTION_CONTINUE_SEARCH;
}
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // NV_OS_LINUX || NV_OS_OSX
#if defined(HAVE_EXECINFO_H) // NV_OS_LINUX
static void nvPrintStackTrace(void * trace[], int size, int start=0) {
char ** string_array = backtrace_symbols(trace, size);
nvDebug( "\nDumping stacktrace:\n" );
for(int i = start; i < size-1; i++ ) {
# if NV_CC_GNUC // defined(HAVE_CXXABI_H)
char * begin = strchr(string_array[i], '(');
char * end = strchr(string_array[i], '+');
if( begin != 0 && begin < end ) {
int stat;
*end = '\0';
*begin = '\0';
char * module = string_array[i];
char * name = abi::__cxa_demangle(begin+1, 0, 0, &stat);
if( name == NULL || begin[1] != '_' || begin[2] != 'Z' ) {
nvDebug( " In: [%s] '%s'\n", module, begin+1 );
}
else {
nvDebug( " In: [%s] '%s'\n", module, name );
}
free(name);
}
else {
nvDebug( " In: '%s'\n", string_array[i] );
}
# else
nvDebug( " In: '%s'\n", string_array[i] );
# endif
}
nvDebug("\n");
free(string_array);
}
#endif // defined(HAVE_EXECINFO_H)
static void nvSigHandler(int sig, siginfo_t *info, void *secret)
{
// Do something useful with siginfo_t
if (sig == SIGSEGV) {
# if NV_CPU_X86
ucontext_t * uc = (ucontext_t *)secret;
nvDebug("Got signal %d, faulty address is %p, from %p\n", sig, info->si_addr, (void *)uc->uc_mcontext.gregs[REG_EIP]);
# else
nvDebug("Got signal %d, faulty address is %p\n", sig, info->si_addr);
# endif
}
else if(sig == SIGTRAP) {
nvDebug("Breakpoint hit.\n");
}
else {
nvDebug("Got signal %d\n", sig);
}
# if defined(HAVE_EXECINFO_H)
void * trace[64];
int size = backtrace(trace, 64);
# if NV_CPU_X86
// Overwrite sigaction with caller's address.
ucontext_t * uc = (ucontext_t *)secret;
trace[1] = (void *) uc->uc_mcontext.gregs[REG_EIP];
# endif // NV_CPU_X86
nvPrintStackTrace(trace, size, 1);
# endif // defined(HAVE_EXECINFO_H)
exit(0);
}
#endif // defined(HAVE_SIGNAL_H)
#if NV_OS_WIN32 //&& NV_CC_MSVC
/** Win32 asset handler. */
struct Win32AssertHandler : public AssertHandler
{
// Code from Daniel Vogel.
static bool isDebuggerPresent()
{
bool result = false;
HINSTANCE kern_lib = LoadLibraryEx( "kernel32.dll", NULL, 0 );
if( kern_lib ) {
FARPROC lIsDebuggerPresent = GetProcAddress( kern_lib, "IsDebuggerPresent" );
if( lIsDebuggerPresent && lIsDebuggerPresent() ) {
result = true;
}
FreeLibrary( kern_lib );
}
return result;
}
// Flush the message queue. This is necessary for the message box to show up.
static void flushMessageQueue()
{
MSG msg;
while( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) {
if( msg.message == WM_QUIT ) break;
TranslateMessage( &msg );
DispatchMessage( &msg );
}
}
// Assert handler method.
virtual int assert( const char * exp, const char * file, int line, const char * func/*=NULL*/ )
{
int ret = NV_ABORT_EXIT;
StringBuilder error_string;
if( func != NULL ) {
error_string.format( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line );
nvDebug( error_string );
}
else {
error_string.format( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line );
nvDebug( error_string );
}
#if _DEBUG
if( isDebuggerPresent() ) {
return NV_ABORT_DEBUG;
}
flushMessageQueue();
int action = MessageBox(NULL, error_string, "Assertion failed", MB_ABORTRETRYIGNORE|MB_ICONERROR);
switch( action ) {
case IDRETRY:
ret = NV_ABORT_DEBUG;
break;
case IDIGNORE:
ret = NV_ABORT_IGNORE;
break;
case IDABORT:
default:
ret = NV_ABORT_EXIT;
break;
}
/*if( _CrtDbgReport( _CRT_ASSERT, file, line, module, exp ) == 1 ) {
return NV_ABORT_DEBUG;
}*/
#endif
if( ret == NV_ABORT_EXIT ) {
// Exit cleanly.
throw std::runtime_error("Assertion failed");
}
return ret;
}
};
#else
/** Unix asset handler. */
struct UnixAssertHandler : public AssertHandler
{
bool isDebuggerPresent()
{
# if NV_OS_DARWIN
int mib[4];
struct kinfo_proc info;
size_t size;
mib[0] = CTL_KERN;
mib[1] = KERN_PROC;
mib[2] = KERN_PROC_PID;
mib[3] = getpid();
size = sizeof(info);
info.kp_proc.p_flag = 0;
sysctl(mib,4,&info,&size,NULL,0);
return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED);
# else
// if ppid != sid, some process spawned our app, probably a debugger.
return getsid(getpid()) != getppid();
# endif
}
// Assert handler method.
virtual int assert(const char * exp, const char * file, int line, const char * func)
{
if( func != NULL ) {
nvDebug( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line );
}
else {
nvDebug( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line );
}
# if _DEBUG
if( isDebuggerPresent() ) {
return NV_ABORT_DEBUG;
}
# endif
# if defined(HAVE_EXECINFO_H)
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 3);
# endif
// Exit cleanly.
throw std::runtime_error("Assertion failed");
}
};
#endif
} // namespace
/// Handle assertion through the asset handler.
int nvAbort(const char * exp, const char * file, int line, const char * func)
{
#if NV_OS_WIN32 //&& NV_CC_MSVC
static Win32AssertHandler s_default_assert_handler;
#else
static UnixAssertHandler s_default_assert_handler;
#endif
if( s_assert_handler != NULL ) {
return s_assert_handler->assert( exp, file, line, func );
}
else {
return s_default_assert_handler.assert( exp, file, line, func );
}
}
/// Shows a message through the message handler.
void NV_CDECL nvDebug(const char *msg, ...)
{
va_list arg;
va_start(arg,msg);
if( s_message_handler != NULL ) {
s_message_handler->log( msg, arg );
}
va_end(arg);
}
/// Dump debug info.
void debug::dumpInfo()
{
#if defined(HAVE_EXECINFO_H)
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 1);
#endif
}
/// Set the debug message handler.
void debug::setMessageHandler(MessageHandler * message_handler)
{
s_message_handler = message_handler;
}
/// Reset the debug message handler.
void debug::resetMessageHandler()
{
s_message_handler = NULL;
}
/// Set the assert handler.
void debug::setAssertHandler(AssertHandler * assert_handler)
{
s_assert_handler = assert_handler;
}
/// Reset the assert handler.
void debug::resetAssertHandler()
{
s_assert_handler = NULL;
}
/// Enable signal handler.
void debug::enableSigHandler()
{
nvCheck(s_sig_handler_enabled != true);
s_sig_handler_enabled = true;
#if NV_OS_WIN32 && NV_CC_MSVC
s_old_exception_filter = ::SetUnhandledExceptionFilter( nvTopLevelFilter );
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
// Install our signal handler
struct sigaction sa;
sa.sa_sigaction = nvSigHandler;
sigemptyset (&sa.sa_mask);
sa.sa_flags = SA_RESTART | SA_SIGINFO;
sigaction(SIGSEGV, &sa, &s_old_sigsegv);
sigaction(SIGTRAP, &sa, &s_old_sigtrap);
sigaction(SIGFPE, &sa, &s_old_sigfpe);
sigaction(SIGBUS, &sa, &s_old_sigbus);
#endif
}
/// Disable signal handler.
void debug::disableSigHandler()
{
nvCheck(s_sig_handler_enabled == true);
s_sig_handler_enabled = false;
#if NV_OS_WIN32 && NV_CC_MSVC
::SetUnhandledExceptionFilter( s_old_exception_filter );
s_old_exception_filter = NULL;
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
sigaction(SIGSEGV, &s_old_sigsegv, NULL);
sigaction(SIGTRAP, &s_old_sigtrap, NULL);
sigaction(SIGFPE, &s_old_sigfpe, NULL);
sigaction(SIGBUS, &s_old_sigbus, NULL);
#endif
}

129
src/nvcore/Debug.h Normal file
View File

@ -0,0 +1,129 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_DEBUG_H
#define NV_CORE_DEBUG_H
#include <nvcore/nvcore.h>
#if defined(HAVE_STDARG_H)
# include <stdarg.h> // va_list
#endif
#define NV_ABORT_DEBUG 1
#define NV_ABORT_IGNORE 2
#define NV_ABORT_EXIT 3
#if NV_CC_MSVC
#define nvNoAssert __noop
#else
#define nvNoAssert(exp)
#endif
#if NV_NO_ASSERT
# define nvAssert(exp) nvNoAssert()
# define nvCheck(exp) nvNoAssert()
# define nvDebugAssert(exp) nvNoAssert()
# define nvDebugCheck(exp) nvNoAssert()
# define nvDebugBreak()
#else // NV_NO_ASSERT
# if NV_CC_MSVC && NV_CPU_X86 && 0
# define nvDebugBreak() __asm int 3
# elif NV_CC_MSVC // this is only on recent versions...
// Do I have to include <intrin.h> ?
# define nvDebugBreak() __debugbreak()
# elif NV_CC_GNUC && NV_CPU_PPC && NV_OS_DARWIN
# define nvDebugBreak() __asm__ volatile ("trap");
# elif NV_CC_GNUC && NV_CPU_X86 && NV_OS_DARWIN
# define nvDebugBreak() __asm__ volatile ("int3");
# elif NV_CC_GNUC && NV_CPU_X86
# define nvDebugBreak() __asm__ ( "int %0" : :"I"(3) )
# else
# include <signal.h>
# define nvDebugBreak() raise(SIGTRAP); //*((int *)(0)) = 0
# endif
# define nvAssertMacro(exp) \
do { \
if(!(exp)) { \
if( nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG ) { \
nvDebugBreak(); \
} \
} \
} while(false)
# define nvAssert(exp) nvAssertMacro(exp)
# define nvCheck(exp) nvAssertMacro(exp)
# if defined(_DEBUG)
# define nvDebugAssert(exp) nvAssertMacro(exp)
# define nvDebugCheck(exp) nvAssertMacro(exp)
# else // _DEBUG
# define nvDebugAssert(exp) nvNoAssert(exp)
# define nvDebugCheck(exp) nvNoAssert(exp)
# endif // _DEBUG
#endif // NV_NO_ASSERT
// Use nvAssume for very simple expresions only: piAssume(0), piAssume(value == true), etc.
#if defined(_DEBUG)
# if NV_CC_MSVC
# define nvAssume(exp) __assume(exp)
# else
# define nvAssume(exp) nvCheck(exp)
# endif
#else
# define nvAssume(exp) nvCheck(exp)
#endif
#define nvError(x) nvAbort(x, __FILE__, __LINE__, __FUNC__)
#define nvWarning(x) nvDebug("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x))
#if PI_CC_MSVC
// I'm not sure it's a good idea to use the default static assert.
#define nvStaticCheck(x) _STATIC_ASSERT(x)
#else
#define nvStaticCheck(x) typedef char NV_DO_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
//#define nvStaticCheck(x) switch(0) { case 0: case x:; }
#endif
NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = 0);
NVCORE_API void NV_CDECL nvDebug( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
namespace nv
{
/** Message handler interface. */
struct MessageHandler {
virtual void log(const char * str, va_list arg) = 0;
virtual ~MessageHandler() {}
};
/** Assert handler interface. */
struct AssertHandler {
virtual int assert(const char *exp, const char *file, int line, const char *func = 0) = 0;
virtual ~AssertHandler() {}
};
namespace debug
{
NVCORE_API void dumpInfo();
NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
NVCORE_API void resetMessageHandler();
NVCORE_API void setAssertHandler( AssertHandler * assertHanlder );
NVCORE_API void resetAssertHandler();
NVCORE_API void enableSigHandler();
NVCORE_API void disableSigHandler();
}
} // nv namespace
#endif // NV_CORE_DEBUG_H

View File

@ -0,0 +1,66 @@
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
#include <stdlib.h> // uint8_t, int8_t, ...
// Function linkage
#define DLL_IMPORT
#if __GNUC__ >= 4
# define DLL_EXPORT __attribute__((visibility("default")))
# define DLL_EXPORT_CLASS DLL_EXPORT
#else
# define DLL_EXPORT
# define DLL_EXPORT_CLASS
#endif
// Function calling modes
#if NV_CPU_X86
# define NV_CDECL __attribute__((cdecl))
# define NV_STDCALL __attribute__((stdcall))
#else
# define NV_CDECL
# define NV_STDCALL
#endif
#define NV_FASTCALL __attribute__((fastcall))
#define NV_FORCEINLINE __attribute__((always_inline))
#if __GNUC__ > 2
#define NV_PURE __attribute__((pure))
#define NV_CONST __attribute__((const))
#else
#define NV_PURE
#define NV_CONST
#endif
// Define __FUNC__ properly.
#if __STDC_VERSION__ < 199901L
# if __GNUC__ >= 2
# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
# else
# define __FUNC__ "<unknown>"
# endif
#else
# define __FUNC__ __PRETTY_FUNCTION__
#endif
#define restrict __restrict__
// Type definitions
typedef uint8_t uint8;
typedef int8_t int8;
typedef uint16_t uint16;
typedef int16_t int16;
typedef uint32_t uint32;
typedef int32_t int32;
typedef uint64_t uint64;
typedef int64_t int64;
// Aliases
typedef uint32 uint;

View File

@ -0,0 +1,63 @@
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
// Function linkage
#define DLL_IMPORT
#if __GNUC__ >= 4
# define DLL_EXPORT __attribute__((visibility("default")))
# define DLL_EXPORT_CLASS DLL_EXPORT
#else
# define DLL_EXPORT
# define DLL_EXPORT_CLASS
#endif
// Function calling modes
#if NV_CPU_X86
# define NV_CDECL __attribute__((cdecl))
# define NV_STDCALL __attribute__((stdcall))
#else
# define NV_CDECL
# define NV_STDCALL
#endif
#define NV_FASTCALL __attribute__((fastcall))
#define NV_FORCEINLINE __attribute__((always_inline))
#if __GNUC__ > 2
#define NV_PURE __attribute__((pure))
#define NV_CONST __attribute__((const))
#else
#define NV_PURE
#define NV_CONST
#endif
// Define __FUNC__ properly.
#if __STDC_VERSION__ < 199901L
# if __GNUC__ >= 2
# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
# else
# define __FUNC__ "<unknown>"
# endif
#else
# define __FUNC__ __PRETTY_FUNCTION__
#endif
#define restrict __restrict__
// Type definitions
typedef unsigned char uint8;
typedef signed char int8;
typedef unsigned short uint16;
typedef signed short int16;
typedef unsigned int uint32;
typedef signed int int32;
typedef unsigned long long uint64;
typedef signed long long int64;
// Aliases
typedef uint32 uint;

View File

@ -0,0 +1,58 @@
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
// Function linkage
#define DLL_IMPORT __declspec(dllimport)
#define DLL_EXPORT __declspec(dllexport)
#define DLL_EXPORT_CLASS DLL_EXPORT
// Function calling modes
#if NV_CPU_X86
# define NV_CDECL __attribute__((cdecl))
# define NV_STDCALL __attribute__((stdcall))
#else
# define NV_CDECL
# define NV_STDCALL
#endif
#define NV_FASTCALL __attribute__((fastcall))
#define NV_FORCEINLINE __attribute__((always_inline))
#if __GNUC__ > 2
#define NV_PURE __attribute__((pure))
#define NV_CONST __attribute__((const))
#else
#define NV_PURE
#define NV_CONST
#endif
// Define __FUNC__ properly.
#if __STDC_VERSION__ < 199901L
# if __GNUC__ >= 2
# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
# else
# define __FUNC__ "<unknown>"
# endif
#else
# define __FUNC__ __PRETTY_FUNCTION__
#endif
#define restrict __restrict__
// Type definitions
typedef unsigned char uint8;
typedef signed char int8;
typedef unsigned short uint16;
typedef signed short int16;
typedef unsigned int uint32;
typedef signed int int32;
typedef unsigned long long uint64;
typedef signed long long int64;
// Aliases
typedef uint32 uint;

94
src/nvcore/DefsVcWin32.h Normal file
View File

@ -0,0 +1,94 @@
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
// Function linkage
#define DLL_IMPORT __declspec(dllimport)
#define DLL_EXPORT __declspec(dllexport)
#define DLL_EXPORT_CLASS DLL_EXPORT
// Function calling modes
#define NV_CDECL __cdecl
#define NV_STDCALL __stdcall
#define NV_FASTCALL __fastcall
#define NV_FORCEINLINE __forceinline
#define NV_PURE
#define NV_CONST
// Set standard function names.
#define snprintf _snprintf
#define vsnprintf _vsnprintf
#define vsscanf _vsscanf
#define chdir _chdir
#define getcwd _getcwd
#define va_copy(a, b) a = b
#if !defined restrict
#define restrict
#endif
#if !defined __attribute__
#define __attribute__(X)
#endif
#if !defined __FUNC__
#define __FUNC__ __FUNCTION__
#endif
// Type definitions
typedef unsigned char uint8;
typedef signed char int8;
typedef unsigned short uint16;
typedef signed short int16;
typedef unsigned int uint32;
typedef signed int int32;
typedef unsigned __int64 uint64;
typedef signed __int64 int64;
// Aliases
typedef uint32 uint;
// Unwanted VC++ warnings to disable.
#pragma warning(disable : 4244) // conversion to float, possible loss of data
#pragma warning(disable : 4245) // conversion from 'enum ' to 'unsigned long', signed/unsigned mismatch
#pragma warning(disable : 4100) // unreferenced formal parameter
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#pragma warning(disable : 4710) // inline function not expanded
#pragma warning(disable : 4127) // Conditional expression is constant
#pragma warning(disable : 4305) // truncation from 'const double' to 'float'
#pragma warning(disable : 4505) // unreferenced local function has been removed
//#pragma warning(disable : 4699) // creating precompiled header
//#pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
#pragma warning(disable : 4702) // unreachable code in inline expanded function
#pragma warning(disable : 4711) // function selected for automatic inlining
#pragma warning(disable : 4725) // Pentium fdiv bug
//#pragma warning(disable : 4512) // assignment operator could not be generated
//#pragma warning(disable : 4530) // C++ exception handler used, but unwind semantics are not enabled
//#pragma warning(disable : 4238) // nonstandard extension used : class rvalue used as lvalue
//#pragma warning(disable : 4251) // needs to have dll-interface to be used by clients of class 'ULinker'
//#pragma warning(disable : 4275) // non dll-interface class used as base for dll-interface class
//#pragma warning(disable : 4511) // copy constructor could not be generated
//#pragma warning(disable : 4284) // return type is not a UDT or reference to a UDT
//#pragma warning(disable : 4355) // this used in base initializer list
//#pragma warning(disable : 4097) // typedef-name '' used as synonym for class-name ''
//#pragma warning(disable : 4291) // typedef-name '' used as synonym for class-name ''
#pragma warning(disable : 4345) // behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized
#pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged.
//#pragma warning(disable : 4996) // function was declared deprecated.
//#pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
#pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup

34
src/nvcore/Memory.cpp Normal file
View File

@ -0,0 +1,34 @@
#include "Memory.h"
#include "Debug.h"
//#if HAVE_MALLOC_H
//#include <malloc.h>
//#endif
#include <stdlib.h>
using namespace nv;
void * nv::mem::malloc(size_t size)
{
return ::malloc(size);
}
void * nv::mem::malloc(size_t size, const char * file, int line)
{
return ::malloc(size);
}
void nv::mem::free(const void * ptr)
{
::free(const_cast<void *>(ptr));
}
void * nv::mem::realloc(void * ptr, size_t size)
{
nvDebugCheck(ptr != NULL || size != 0); // undefined realloc behavior.
return ::realloc(ptr, size);
}

186
src/nvcore/Memory.h Normal file
View File

@ -0,0 +1,186 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_MEMORY_H
#define NV_CORE_MEMORY_H
#include <nvcore/nvcore.h>
#include <stdlib.h> // malloc(), realloc() and free()
#include <stddef.h> // size_t
#include <new> // new and delete
// Custom memory allocator
namespace nv
{
namespace mem
{
NVCORE_API void * malloc(size_t size);
NVCORE_API void * malloc(size_t size, const char * file, int line);
NVCORE_API void free(const void * ptr);
NVCORE_API void * realloc(void * ptr, size_t size);
} // mem namespace
} // nv namespace
// Override new/delete
inline void * operator new (size_t size) throw()
{
return nv::mem::malloc(size);
}
inline void operator delete (void *p) throw()
{
nv::mem::free(p);
}
inline void * operator new [] (size_t size) throw()
{
return nv::mem::malloc(size);
}
inline void operator delete [] (void * p) throw()
{
nv::mem::free(p);
}
/*
#ifdef _DEBUG
#define new new(__FILE__, __LINE__)
#define malloc(i) malloc(i, __FILE__, __LINE__)
#endif
*/
#if 0
/*
File: main.cpp
Version: 1.0
Abstract: Overrides the C++ 'operator new' and 'operator delete'.
Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Computer, Inc.
("Apple") in consideration of your agreement to the following terms, and your
use, installation, modification or redistribution of this Apple software
constitutes acceptance of these terms. If you do not agree with these terms,
please do not use, install, modify or redistribute this Apple software.
In consideration of your agreement to abide by the following terms, and subject
to these terms, Apple grants you a personal, non-exclusive license, under Apples
copyrights in this original Apple software (the "Apple Software"), to use,
reproduce, modify and redistribute the Apple Software, with or without
modifications, in source and/or binary forms; provided that if you redistribute
the Apple Software in its entirety and without modifications, you must retain
this notice and the following text and disclaimers in all such redistributions of
the Apple Software. Neither the name, trademarks, service marks or logos of
Apple Computer, Inc. may be used to endorse or promote products derived from the
Apple Software without specific prior written permission from Apple. Except as
expressly stated in this notice, no other rights or licenses, express or implied,
are granted by Apple herein, including but not limited to any patent rights that
may be infringed by your derivative works or by other works in which the Apple
Software may be incorporated.
The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION ALONE OR IN
COMBINATION WITH YOUR PRODUCTS.
IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION
OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, TORT
(INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Copyright © 2006 Apple Computer, Inc., All Rights Reserved
*/
/* This sample shows how to override the C++ global 'new' and 'delete' operators. */
#include <new>
#include <iostream>
#include <cstdlib>
#include <stdexcept>
#include <locale>
/* Some variables and code to make the example do something. */
namespace {
unsigned long long gNewCounter; // number of times 'new' was called
unsigned long long gDeleteCounter; // number of times 'delete' was called
void printCounters() // print the counters above
{
std::cout << "new was called " << gNewCounter << " times and delete was called " << gDeleteCounter << " times\n";
}
}
/* These are the overridden new and delete routines.
Most applications will want to override at least these four versions of new/delete if they override any of them.
In Mac OS, it's not necessary to override the array versions of operator new and delete if all
they would do is call the non-array versions; the C++ standard library, as an extension
to the C++ standard, does this for you.
Developers should consult the section [lib.support.dynamic] in the C++ standard to see the requirements
on the generic operators new and delete; the system may expect that your overridden operators meet all these
requirements.
Your operators may be called by the system, even early in start-up before constructors have been executed. */
void* operator new(std::size_t sz) throw (std::bad_alloc)
{
void *result = std::malloc (sz == 0 ? 1 : sz);
if (result == NULL)
throw std::bad_alloc();
gNewCounter++;
return result;
}
void operator delete(void* p) throw()
{
if (p == NULL)
return;
std::free (p);
gDeleteCounter++;
}
/* These are the 'nothrow' versions of the above operators.
The system version will try to call a std::new_handler if they
fail, but your overriding versions are not required to do this. */
void* operator new(std::size_t sz, const std::nothrow_t&) throw()
{
try {
void * result = ::operator new (sz); // calls our overridden operator new
return result;
} catch (std::bad_alloc &) {
return NULL;
}
}
void operator delete(void* p, const std::nothrow_t&) throw()
{
::operator delete (p);
}
/* Bug 4067110 is that if your program has no weak symbols at all, the linker will not set the
WEAK_DEFINES bit in the Mach-O header and as a result the new and delete operators above won't
be seen by system libraries. This is mostly a problem for test programs and small examples,
since almost all real C++ programs complicated enough to override new and delete will have at
least one weak symbol. However, this is a small example, so: */
void __attribute__((weak, visibility("default"))) workaroundFor4067110 () { }
/* This is a simple test program that causes the runtime library to call new and delete. */
int main()
{
atexit (printCounters);
try {
std::locale example("does_not_exist");
} catch (std::runtime_error &x) {
}
return 0;
}
#endif // 0
#endif // NV_CORE_MEMORY_H

24
src/nvcore/Prefetch.h Normal file
View File

@ -0,0 +1,24 @@
// nvPrefetch
#if NV_CC_GNUC
#define nvPrefetch(ptr) __builtin_prefetch(ptr)
#elif NV_CC_MSVC
#if NV_CPU_X86
__forceinline void nvPrefetch(const void * mem)
{
__asm mov ecx, mem
__asm prefetcht0 [ecx];
// __asm prefetchnta [ecx];
}
#endif // NV_CPU_X86
#else // NV_CC_MSVC
// do nothing in other case.
#define piPrefetch(ptr)
#endif // NV_CC_MSVC

420
src/nvcore/Ptr.h Normal file
View File

@ -0,0 +1,420 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_PTR_H
#define NV_CORE_PTR_H
#include <nvcore/nvcore.h>
#include <nvcore/Debug.h>
#include <stdio.h> // NULL
namespace nv
{
/** Simple auto pointer template class.
*
* This is very similar to the standard auto_ptr class, but with some
* additional limitations to make its use less error prone:
* - Copy constructor and assignment operator are disabled.
* - reset method is removed.
*
* The semantics of the standard auto_ptr are not clear and change depending
* on the std implementation. For a discussion of the problems of auto_ptr read:
* http://www.awprofessional.com/content/images/020163371X/autoptrupdate\auto_ptr_update.html
*/
template <class T>
class AutoPtr
{
NV_FORBID_COPY(AutoPtr);
NV_FORBID_HEAPALLOC();
public:
/** Ctor. */
explicit AutoPtr( T * p ) {
m_ptr = p;
}
/** Dtor. Deletes owned pointer. */
~AutoPtr() {
delete m_ptr;
m_ptr = NULL;
}
/** Delete owned pointer and assign new one. */
void operator=( T * p ) {
delete m_ptr;
m_ptr = p;
}
/** Member access. */
T * operator -> () const {
nvDebugCheck(m_ptr != NULL);
return m_ptr;
}
/** Get reference. */
T & operator*() const {
nvDebugCheck(m_ptr != NULL);
return *m_ptr;
}
/** Get pointer. */
T * ptr() const { return m_ptr; }
/** Relinquish ownership of the underlying pointer and returns that pointer. */
T * release() {
T * tmp = m_ptr;
m_ptr = NULL;
return tmp;
}
/** Const pointer equal comparation. */
bool operator == (const T * const p) const {
return (m_ptr == p);
}
/** Const pointer nequal comparation. */
bool operator != (const T * const p) const {
return (m_ptr != p);
}
private:
T * m_ptr;
};
#if 0
/** Reference counted base class to be used with Pointer.
*
* The only requirement of the Pointer class is that the RefCounted class implements the
* addRef and release methods.
*/
class RefCounted
{
NV_FORBID_COPY(RefCounted);
public:
/// Ctor.
RefCounted() : m_count(0), m_weak_proxy(NULL)
{
s_total_obj_count++;
}
/// Virtual dtor.
virtual ~RefCounted()
{
nvCheck( m_count == 0 );
nvCheck( s_total_obj_count > 0 );
s_total_obj_count--;
}
/// Increase reference count.
uint addRef() const
{
s_total_ref_count++;
m_count++;
return m_count;
}
/// Decrease reference count and remove when 0.
uint release() const
{
nvCheck( m_count > 0 );
s_total_ref_count--;
m_count--;
if( m_count == 0 ) {
releaseWeakProxy();
delete this;
return 0;
}
return m_count;
}
/// Get weak proxy.
WeakProxy * getWeakProxy() const
{
if (m_weak_proxy == NULL) {
m_weak_proxy = new WeakProxy;
m_weak_proxy->AddRef();
}
return m_weak_proxy;
}
/// Release the weak proxy.
void releaseWeakProxy() const
{
if (m_weak_proxy != NULL) {
m_weak_proxy->NotifyObjectDied();
m_weak_proxy->Release();
m_weak_proxy = NULL;
}
}
/** @name Debug methods: */
//@{
/// Get reference count.
int refCount() const
{
return m_count;
}
/// Get total number of objects.
static int totalObjectCount()
{
return s_total_obj_count;
}
/// Get total number of references.
static int totalReferenceCount()
{
return s_total_ref_count;
}
//@}
private:
NVCORE_API static int s_total_ref_count;
NVCORE_API static int s_total_obj_count;
mutable int m_count;
mutable WeakProxy * weak_proxy;
};
#endif
/// Smart pointer template class.
template <class BaseClass>
class Pointer {
public:
// BaseClass must implement addRef() and release().
typedef Pointer<BaseClass> ThisType;
/// Default ctor.
Pointer() : m_ptr(NULL)
{
}
/** Other type assignment. */
template <class OtherBase>
Pointer( const Pointer<OtherBase> & tc )
{
m_ptr = static_cast<BaseClass *>( tc.ptr() );
if( m_ptr ) {
m_ptr->addRef();
}
}
/** Copy ctor. */
Pointer( const ThisType & bc )
{
m_ptr = bc.ptr();
if( m_ptr ) {
m_ptr->addRef();
}
}
/** Copy cast ctor. Pointer(NULL) is valid. */
explicit Pointer( BaseClass * bc )
{
m_ptr = bc;
if( m_ptr ) {
m_ptr->addRef();
}
}
/** Dtor. */
~Pointer()
{
set(NULL);
}
/** @name Accessors: */
//@{
/** -> operator. */
BaseClass * operator -> () const
{
piCheck( m_ptr != NULL );
return m_ptr;
}
/** * operator. */
BaseClass & operator*() const
{
piCheck( m_ptr != NULL );
return *m_ptr;
}
/** Get pointer. */
BaseClass * ptr() const
{
return m_ptr;
}
//@}
/** @name Mutators: */
//@{
/** Other type assignment. */
template <class OtherBase>
void operator = ( const Pointer<OtherBase> & tc )
{
set( static_cast<BaseClass *>(tc.ptr()) );
}
/** This type assignment. */
void operator = ( const ThisType & bc )
{
set( bc.ptr() );
}
/** Pointer assignment. */
void operator = ( BaseClass * bc )
{
set( bc );
}
//@}
/** @name Comparators: */
//@{
/** Other type equal comparation. */
template <class OtherBase>
bool operator == ( const Pointer<OtherBase> & other ) const
{
return m_ptr == other.ptr();
}
/** This type equal comparation. */
bool operator == ( const ThisType & bc ) const
{
return m_ptr == bc.ptr();
}
/** Const pointer equal comparation. */
bool operator == ( const BaseClass * const bc ) const
{
return m_ptr == bc;
}
/** Other type not equal comparation. */
template <class OtherBase>
bool operator != ( const Pointer<OtherBase> & other ) const
{
return m_ptr != other.ptr();
}
/** Other type not equal comparation. */
bool operator != ( const ThisType & bc ) const
{
return m_ptr != bc.ptr();
}
/** Const pointer not equal comparation. */
bool operator != (const BaseClass * const bc) const
{
return m_ptr != bc;
}
/** This type lower than comparation. */
bool operator < (const ThisType & p) const
{
return m_ptr < p.ptr();
}
//@}
private:
/** Set this pointer. */
void set( BaseClass * p )
{
if( m_ptr != p ) {
if( m_ptr ) m_ptr->release();
if( p ) p->addRef();
m_ptr = p;
}
}
private:
BaseClass * m_ptr;
};
/*
template <class T>
class QSharedDataPointer
{
public:
inline void detach() { if (d && d->ref != 1) detach_helper(); }
inline T &operator*() { detach(); return *d; }
inline const T &operator*() const { return *d; }
inline T *operator->() { detach(); return d; }
inline const T *operator->() const { return d; }
inline operator T *() { detach(); return d; }
inline operator const T *() const { return d; }
inline T *data() { detach(); return d; }
inline const T *data() const { return d; }
inline const T *constData() const { return d; }
inline bool operator==(const QSharedDataPointer<T> &other) const { return d == other.d; }
inline bool operator!=(const QSharedDataPointer<T> &other) const { return d != other.d; }
inline QSharedDataPointer() { d = 0; }
inline ~QSharedDataPointer() { if (d && !d->ref.deref()) delete d; }
explicit QSharedDataPointer(T *data);
inline QSharedDataPointer(const QSharedDataPointer<T> &o) : d(o.d) { if (d) d->ref.ref(); }
inline QSharedDataPointer<T> & operator=(const QSharedDataPointer<T> &o) {
if (o.d != d) {
T *x = o.d;
if (x) x->ref.ref();
x = qAtomicSetPtr(&d, x);
if (x && !x->ref.deref())
delete x;
}
return *this;
}
inline QSharedDataPointer &operator=(T *o) {
if (o != d) {
T *x = o;
if (x) x->ref.ref();
x = qAtomicSetPtr(&d, x);
if (x && !x->ref.deref())
delete x;
}
return *this;
}
inline bool operator!() const { return !d; }
private:
void detach_helper();
T *d;
};
template <class T>
Q_INLINE_TEMPLATE QSharedDataPointer<T>::QSharedDataPointer(T *adata) : d(adata)
{ if (d) d->ref.ref(); }
template <class T>
Q_OUTOFLINE_TEMPLATE void QSharedDataPointer<T>::detach_helper()
{
T *x = new T(*d);
x->ref.ref();
x = qAtomicSetPtr(&d, x);
if (!x->ref.deref())
delete x;
}
*/
} // nv namespace
#endif // NV_CORE_PTR_H

429
src/nvcore/Radix.cpp Normal file
View File

@ -0,0 +1,429 @@
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Contains source code from the article "Radix Sort Revisited".
* \file Radix.cpp
* \author Pierre Terdiman
* \date April, 4, 2000
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Revisited Radix Sort.
* This is my new radix routine:
* - it uses indices and doesn't recopy the values anymore, hence wasting less ram
* - it creates all the histograms in one run instead of four
* - it sorts words faster than dwords and bytes faster than words
* - it correctly sorts negative floating-point values by patching the offsets
* - it automatically takes advantage of temporal coherence
* - multiple keys support is a side effect of temporal coherence
* - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway]
*
* History:
* - 08.15.98: very first version
* - 04.04.00: recoded for the radix article
* - 12.xx.00: code lifting
* - 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here)
* - 10.11.01: added local ram support
* - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting......
*
* \class RadixSort
* \author Pierre Terdiman
* \version 1.3
* \date August, 15, 1998
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/*
To do:
- add an offset parameter between two input values (avoid some data recopy sometimes)
- unroll ? asm ?
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Header
#include <nvcore/Radix.h>
#include <string.h> // memset
//using namespace IceCore;
#define DELETEARRAY(a) { delete [] a; a = NULL; }
#define CHECKALLOC(a)
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Constructor.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort::RadixSort() : mCurrentSize(0), mPreviousSize(0), mIndices(NULL), mIndices2(NULL), mTotalCalls(0), mNbHits(0)
{
#ifndef RADIX_LOCAL_RAM
// Allocate input-independent ram
mHistogram = new uint32[256*4];
mOffset = new uint32[256];
#endif
// Initialize indices
resetIndices();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Destructor.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort::~RadixSort()
{
// Release everything
#ifndef RADIX_LOCAL_RAM
DELETEARRAY(mOffset);
DELETEARRAY(mHistogram);
#endif
DELETEARRAY(mIndices2);
DELETEARRAY(mIndices);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Resizes the inner lists.
* \param nb [in] new size (number of dwords)
* \return true if success
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool RadixSort::resize(uint32 nb)
{
// Free previously used ram
DELETEARRAY(mIndices2);
DELETEARRAY(mIndices);
// Get some fresh one
mIndices = new uint32[nb]; CHECKALLOC(mIndices);
mIndices2 = new uint32[nb]; CHECKALLOC(mIndices2);
mCurrentSize = nb;
// Initialize indices so that the input buffer is read in sequential order
resetIndices();
return true;
}
#define CHECK_RESIZE(n) \
if(n!=mPreviousSize) \
{ \
if(n>mCurrentSize) resize(n); \
else resetIndices(); \
mPreviousSize = n; \
}
#define CREATE_HISTOGRAMS(type, buffer) \
/* Clear counters */ \
memset(mHistogram, 0, 256*4*sizeof(uint32)); \
\
/* Prepare for temporal coherence */ \
type PrevVal = (type)buffer[mIndices[0]]; \
bool AlreadySorted = true; /* Optimism... */ \
uint32* Indices = mIndices; \
\
/* Prepare to count */ \
uint8* p = (uint8*)input; \
uint8* pe = &p[nb*4]; \
uint32* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */ \
uint32* h1= &mHistogram[256]; /* Histogram for second pass */ \
uint32* h2= &mHistogram[512]; /* Histogram for third pass */ \
uint32* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */ \
\
while(p!=pe) \
{ \
/* Read input buffer in previous sorted order */ \
type Val = (type)buffer[*Indices++]; \
/* Check whether already sorted or not */ \
if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ \
/* Update for next iteration */ \
PrevVal = Val; \
\
/* Create histograms */ \
h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \
} \
\
/* If all input values are already sorted, we just have to return and leave the */ \
/* previous list unchanged. That way the routine may take advantage of temporal */ \
/* coherence, for example when used to sort transparent faces. */ \
if(AlreadySorted) { mNbHits++; return *this; } \
\
/* Else there has been an early out and we must finish computing the histograms */ \
while(p!=pe) \
{ \
/* Create histograms without the previous overhead */ \
h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \
}
#define CHECK_PASS_VALIDITY(pass) \
/* Shortcut to current counters */ \
uint32* CurCount = &mHistogram[pass<<8]; \
\
/* Reset flag. The sorting pass is supposed to be performed. (default) */ \
bool PerformPass = true; \
\
/* Check pass validity */ \
\
/* If all values have the same byte, sorting is useless. */ \
/* It may happen when sorting bytes or words instead of dwords. */ \
/* This routine actually sorts words faster than dwords, and bytes */ \
/* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \
/* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
\
/* Get first byte */ \
uint8 UniqueVal = *(((uint8*)input)+pass); \
\
/* Check that byte's counter */ \
if(CurCount[UniqueVal]==nb) PerformPass=false;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Main sort routine.
* This one is for integer values. After the call, mIndices contains a list of indices in sorted order, i.e. in the order you may process your data.
* \param input [in] a list of integer values to sort
* \param nb [in] number of values to sort
* \param signedvalues [in] true to handle negative values, false if you know your input buffer only contains positive values
* \return Self-Reference
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort& RadixSort::sort(const uint32* input, uint32 nb, bool signedvalues)
{
uint32 i, j;
// Checkings
if(!input || !nb) return *this;
// Stats
mTotalCalls++;
// Resize lists if needed
CHECK_RESIZE(nb);
#ifdef RADIX_LOCAL_RAM
// Allocate histograms & offsets on the stack
uint32 mHistogram[256*4];
uint32 mOffset[256];
#endif
// Create histograms (counters). Counters for all passes are created in one run.
// Pros: read input buffer once instead of four times
// Cons: mHistogram is 4Kb instead of 1Kb
// We must take care of signed/unsigned values for temporal coherence.... I just
// have 2 code paths even if just a single opcode changes. Self-modifying code, someone?
if(!signedvalues) { CREATE_HISTOGRAMS(uint32, input); }
else { CREATE_HISTOGRAMS(int32, input); }
// Compute #negative values involved if needed
uint32 NbNegativeValues = 0;
if(signedvalues)
{
// An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
// last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
// responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
uint32* h3= &mHistogram[768];
for( i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part
}
// Radix sort, j is the pass number (0=LSB, 3=MSB)
for( j=0;j<4;j++)
{
CHECK_PASS_VALIDITY(j);
// Sometimes the fourth (negative) pass is skipped because all numbers are negative and the MSB is 0xFF (for example). This is
// not a problem, numbers are correctly sorted anyway.
if(PerformPass)
{
// Should we care about negative values?
if(j!=3 || !signedvalues)
{
// Here we deal with positive values only
// Create offsets
mOffset[0] = 0;
for(i=1;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
}
else
{
// This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place.
// Create biased offsets, in order for negative numbers to be sorted as well
mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones
for(i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
// Fixing the wrong place for negative values
mOffset[128] = 0;
for(i=129;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
}
// Perform Radix Sort
uint8* InputBytes = (uint8*)input;
uint32* Indices = mIndices;
uint32* IndicesEnd = &mIndices[nb];
InputBytes += j;
while(Indices!=IndicesEnd)
{
uint32 id = *Indices++;
mIndices2[mOffset[InputBytes[id<<2]]++] = id;
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
uint32* Tmp = mIndices; mIndices = mIndices2; mIndices2 = Tmp;
}
}
return *this;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Main sort routine.
* This one is for floating-point values. After the call, mIndices contains a list of indices in sorted order, i.e. in the order you may process your data.
* \param input [in] a list of floating-point values to sort
* \param nb [in] number of values to sort
* \return Self-Reference
* \warning only sorts IEEE floating-point values
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort& RadixSort::sort(const float* input2, uint32 nb)
{
uint32 i, j;
// Checkings
if(!input2 || !nb) return *this;
// Stats
mTotalCalls++;
uint32* input = (uint32*)input2;
// Resize lists if needed
CHECK_RESIZE(nb);
#ifdef RADIX_LOCAL_RAM
// Allocate histograms & offsets on the stack
uint32 mHistogram[256*4];
uint32 mOffset[256];
#endif
// Create histograms (counters). Counters for all passes are created in one run.
// Pros: read input buffer once instead of four times
// Cons: mHistogram is 4Kb instead of 1Kb
// Floating-point values are always supposed to be signed values, so there's only one code path there.
// Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
// is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
// generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
// wouldn't work with mixed positive/negative values....
{ CREATE_HISTOGRAMS(float, input2); }
// Compute #negative values involved if needed
uint32 NbNegativeValues = 0;
// An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
// last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
// responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
uint32* h3= &mHistogram[768];
for( i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part
// Radix sort, j is the pass number (0=LSB, 3=MSB)
for( j=0;j<4;j++)
{
// Should we care about negative values?
if(j!=3)
{
// Here we deal with positive values only
CHECK_PASS_VALIDITY(j);
if(PerformPass)
{
// Create offsets
mOffset[0] = 0;
for( i=1;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
// Perform Radix Sort
uint8* InputBytes = (uint8*)input;
uint32* Indices = mIndices;
uint32* IndicesEnd = &mIndices[nb];
InputBytes += j;
while(Indices!=IndicesEnd)
{
uint32 id = *Indices++;
mIndices2[mOffset[InputBytes[id<<2]]++] = id;
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
uint32* Tmp = mIndices; mIndices = mIndices2; mIndices2 = Tmp;
}
}
else
{
// This is a special case to correctly handle negative values
CHECK_PASS_VALIDITY(j);
if(PerformPass)
{
// Create biased offsets, in order for negative numbers to be sorted as well
mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones
for(i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
// We must reverse the sorting order for negative numbers!
mOffset[255] = 0;
for(i=0;i<127;i++) mOffset[254-i] = mOffset[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values
for(i=128;i<256;i++) mOffset[i] += CurCount[i]; // Fixing the wrong place for negative values
// Perform Radix Sort
for(i=0;i<nb;i++)
{
uint32 Radix = input[mIndices[i]]>>24; // Radix byte, same as above. AND is useless here (uint32).
// ### cmp to be killed. Not good. Later.
if(Radix<128) mIndices2[mOffset[Radix]++] = mIndices[i]; // Number is positive, same as above
else mIndices2[--mOffset[Radix]] = mIndices[i]; // Number is negative, flip the sorting order
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
uint32* Tmp = mIndices; mIndices = mIndices2; mIndices2 = Tmp;
}
else
{
// The pass is useless, yet we still have to reverse the order of current list if all values are negative.
if(UniqueVal>=128)
{
for(i=0;i<nb;i++) mIndices2[i] = mIndices[nb-i-1];
// Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
uint32* Tmp = mIndices; mIndices = mIndices2; mIndices2 = Tmp;
}
}
}
}
return *this;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Resets the inner indices. After the call, mIndices is reset.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void RadixSort::resetIndices()
{
for(uint32 i=0;i<mCurrentSize;i++) mIndices[i] = i;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Gets the ram used.
* \return memory used in bytes
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
uint32 RadixSort::usedRam() const
{
uint32 UsedRam = sizeof(RadixSort);
#ifndef RADIX_LOCAL_RAM
UsedRam += 256*4*sizeof(uint32); // Histograms
UsedRam += 256*sizeof(uint32); // Offsets
#endif
UsedRam += 2*mCurrentSize*sizeof(uint32); // 2 lists of indices
return UsedRam;
}

69
src/nvcore/Radix.h Normal file
View File

@ -0,0 +1,69 @@
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Contains source code from the article "Radix Sort Revisited".
* \file Radix.h
* \author Pierre Terdiman
* \date April, 4, 2000
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Include Guard
#ifndef NV_CORE_RADIXSORT_H
#define NV_CORE_RADIXSORT_H
#include <nvcore/nvcore.h>
#define RADIX_LOCAL_RAM
class NVCORE_API RadixSort {
NV_FORBID_COPY(RadixSort);
public:
// Constructor/Destructor
RadixSort();
~RadixSort();
// Sorting methods
RadixSort & sort(const uint32* input, uint32 nb, bool signedvalues=true);
RadixSort & sort(const float* input, uint32 nb);
//! Access to results. mIndices is a list of indices in sorted order, i.e. in the order you may further process your data
inline uint32 * indices() const { return mIndices; }
//! mIndices2 gets trashed on calling the sort routine, but otherwise you can recycle it the way you want.
inline uint32 * recyclable() const { return mIndices2; }
// Stats
uint32 usedRam() const;
//! Returns the total number of calls to the radix sorter.
inline uint32 totalCalls() const { return mTotalCalls; }
//! Returns the number of premature exits due to temporal coherence.
inline uint32 hits() const { return mNbHits; }
private:
#ifndef RADIX_LOCAL_RAM
uint32* mHistogram; //!< Counters for each byte
uint32* mOffset; //!< Offsets (nearly a cumulative distribution function)
#endif
uint32 mCurrentSize; //!< Current size of the indices list
uint32 mPreviousSize; //!< Size involved in previous call
uint32* mIndices; //!< Two lists, swapped each pass
uint32* mIndices2;
// Stats
uint32 mTotalCalls;
uint32 mNbHits;
// Internal methods
bool resize(uint32 nb);
void resetIndices();
};
#endif // NV_CORE_RADIXSORT_H

336
src/nvcore/StdStream.h Normal file
View File

@ -0,0 +1,336 @@
#ifndef NV_STDSTREAM_H
#define NV_STDSTREAM_H
#include <nvcore/Stream.h>
#include <stdio.h> // fopen
#include <string.h> // memcpy
#include <exception> // std::exception
namespace nv
{
// Portable version of fopen.
inline FILE * fileOpen(const char * fileName, const char * mode)
{
#if NV_CC_MSVC && _MSC_VER >= 1400
FILE * fp;
if (fopen_s(&fp, fileName, mode) == 0) {
return fp;
}
return NULL;
#else
return fopen(fileName, mode);
#endif
}
/// Base stdio stream.
class StdStream : public Stream
{
public:
/// Ctor.
StdStream( FILE * fp, bool autoclose=true ) :
m_fp(fp), m_autoclose(autoclose) { }
/// Dtor.
virtual ~StdStream()
{
if( m_fp != NULL && m_autoclose ) {
fclose( m_fp );
}
}
/** @name Stream implementation. */
//@{
virtual void seek( int pos )
{
nvDebugCheck(m_fp != NULL);
fseek(m_fp, pos, SEEK_SET);
}
virtual int tell() const
{
nvDebugCheck(m_fp != NULL);
return ftell(m_fp);
}
virtual int size() const
{
int pos = ftell(m_fp);
fseek(m_fp, 0, SEEK_END);
int end = ftell(m_fp);
fseek(m_fp, pos, SEEK_SET);
return end;
}
virtual bool isError() const
{
return m_fp == NULL || ferror( m_fp ) != 0;
}
virtual bool isAtEnd() const
{
nvDebugCheck(m_fp != NULL);
return feof( m_fp ) != 0;
}
/// Always true.
virtual bool isSeekable() const { return true; }
//@}
protected:
FILE * m_fp;
bool m_autoclose;
};
/// Standard output stream.
class StdOutputStream : public StdStream
{
public:
/// Construct stream by file name.
StdOutputStream( const char * name ) :
StdStream(fileOpen(name, "wb")) { }
/// Construct stream by file handle.
StdOutputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
{
}
/** @name Stream implementation. */
//@{
/// Write data.
virtual void serialize( void * data, int len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(m_fp != NULL);
fwrite(data, len, 1, m_fp);
}
virtual bool isLoading() const
{
return false;
}
virtual bool isSaving() const
{
return true;
}
//@}
};
/// Standard input stream.
class StdInputStream : public StdStream
{
public:
/// Construct stream by file name.
StdInputStream( const char * name ) :
StdStream(fileOpen(name, "rb")) { }
/// Construct stream by file handle.
StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
{
}
/** @name Stream implementation. */
//@{
/// Read data.
virtual void serialize( void * data, int len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(m_fp != NULL);
fread(data, len, 1, m_fp);
}
virtual bool isLoading() const
{
return true;
}
virtual bool isSaving() const
{
return false;
}
//@}
};
/// Memory input stream.
class MemoryInputStream : public Stream
{
public:
/// Ctor.
MemoryInputStream( const uint8 * mem, int size ) :
m_mem(mem), m_ptr(mem), m_size(size) { }
/** @name Stream implementation. */
//@{
/// Read data.
virtual void serialize( void * data, int len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(!isError());
memcpy( data, m_ptr, len );
m_ptr += len;
}
virtual void seek( int pos )
{
nvDebugCheck(!isError());
m_ptr = m_mem + pos;
nvDebugCheck(!isError());
}
virtual int tell() const
{
return m_ptr - m_mem;
}
virtual int size() const
{
return m_size;
}
virtual bool isError() const
{
return m_mem == NULL || m_ptr > m_mem + m_size || m_ptr < m_mem;
}
virtual bool isAtEnd() const
{
return m_ptr == m_mem + m_size;
}
/// Always true.
virtual bool isSeekable() const
{
return true;
}
virtual bool isLoading() const
{
return true;
}
virtual bool isSaving() const
{
return false;
}
//@}
private:
const uint8 * m_mem;
const uint8 * m_ptr;
int m_size;
};
/// Protected input stream.
class ProtectedStream : public Stream
{
public:
/// Ctor.
ProtectedStream( Stream & s ) : m_s(&s), m_autodelete(false)
{
}
/// Ctor.
ProtectedStream( Stream * s, bool autodelete = true ) :
m_s(s), m_autodelete(autodelete)
{
nvDebugCheck(m_s != NULL);
}
/// Dtor.
virtual ~ProtectedStream()
{
if( m_autodelete ) {
delete m_s;
}
}
/** @name Stream implementation. */
//@{
/// Read data.
virtual void serialize( void * data, int len )
{
nvDebugCheck(data != NULL);
m_s->serialize( data, len );
if( m_s->isError() ) {
throw std::exception();
}
}
virtual void seek( int pos )
{
m_s->seek( pos );
if( m_s->isError() ) {
throw std::exception();
}
}
virtual int tell() const
{
return m_s->tell();
}
virtual int size() const
{
return m_s->size();
}
virtual bool isError() const
{
return m_s->isError();
}
virtual bool isAtEnd() const
{
return m_s->isAtEnd();
}
virtual bool isSeekable() const
{
return m_s->isSeekable();
}
virtual bool isLoading() const
{
return m_s->isLoading();
}
virtual bool isSaving() const
{
return m_s->isSaving();
}
//@}
private:
Stream * m_s;
bool m_autodelete;
};
} // nv namespace
#endif // NV_STDSTREAM_H

632
src/nvcore/StrLib.cpp Normal file
View File

@ -0,0 +1,632 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/StrLib.h>
#include <math.h> // log
#include <stdio.h> // vsnprintf
#if NV_CC_MSVC
#include <stdarg.h> // vsnprintf
#endif
#if NV_OS_WIN32
#define NV_PATH_SEPARATOR '\\'
#else
#define NV_PATH_SEPARATOR '/'
#endif
using namespace nv;
namespace
{
static char * strAlloc(uint size)
{
return static_cast<char *>(mem::malloc(size));
}
static char * strReAlloc(char * str, uint size)
{
return static_cast<char *>(mem::realloc(str, size));
}
static void strFree(const char * str)
{
return mem::free(const_cast<char *>(str));
}
/*static char * strDup( const char * str )
{
nvDebugCheck( str != NULL );
uint len = uint(strlen( str ) + 1);
char * dup = strAlloc( len );
memcpy( dup, str, len );
return dup;
}*/
// helper function for integer to string conversion.
static char * i2a( uint i, char *a, uint r )
{
if( i / r > 0 ) {
a = i2a( i / r, a, r );
}
*a = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % r];
return a + 1;
}
// Locale independent functions.
static inline char toUpper( char c ) {
return (c<'a' || c>'z') ? (c) : (c+'A'-'a');
}
static inline char toLower( char c ) {
return (c<'A' || c>'Z') ? (c) : (c+'a'-'A');
}
static inline bool isAlpha( char c ) {
return (c>='a' && c<='z') || (c>='A' && c<='Z');
}
static inline bool isDigit( char c ) {
return c>='0' && c<='9';
}
static inline bool isAlnum( char c ) {
return (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9');
}
}
int nv::strCmp(const char * s1, const char * s2)
{
nvDebugCheck(s1 != NULL);
nvDebugCheck(s2 != NULL);
return strcmp(s1, s2);
}
int nv::strCaseCmp(const char * s1, const char * s2)
{
nvDebugCheck(s1 != NULL);
nvDebugCheck(s1 != NULL);
#if NV_CC_MSVC
return _stricmp(s1, s2);
#else
return strcasecmp(s1, s2);
#endif
}
void nv::strCpy(char * dst, int size, const char * src)
{
nvDebugCheck(dst != NULL);
nvDebugCheck(src != NULL);
#if NV_CC_MSVC && _MSC_VER >= 1400
strcpy_s(dst, size, src);
#else
NV_UNUSED(size);
strcpy(dst, src);
#endif
}
void nv::strCat(char * dst, int size, const char * src)
{
nvDebugCheck(dst != NULL);
nvDebugCheck(src != NULL);
#if NV_CC_MSVC && _MSC_VER >= 1400
strcat_s(dst, size, src);
#else
NV_UNUSED(size);
strcat(dst, src);
#endif
}
/** Pattern matching routine. I don't remember where did I get this. */
bool nv::strMatch(const char * str, const char * pat)
{
nvDebugCheck(str != NULL);
nvDebugCheck(pat != NULL);
char c2;
while (true) {
if (*pat==0) {
if (*str==0) return true;
else return false;
}
if ((*str==0) && (*pat!='*')) return false;
if (*pat=='*') {
pat++;
if (*pat==0) return true;
while (true) {
if (strMatch(str, pat)) return true;
if (*str==0) return false;
str++;
}
}
if (*pat=='?') goto match;
if (*pat=='[') {
pat++;
while (true) {
if ((*pat==']') || (*pat==0)) return false;
if (*pat==*str) break;
if (pat[1] == '-') {
c2 = pat[2];
if (c2==0) return false;
if ((*pat<=*str) && (c2>=*str)) break;
if ((*pat>=*str) && (c2<=*str)) break;
pat+=2;
}
pat++;
}
while (*pat!=']') {
if (*pat==0) {
pat--;
break;
}
pat++;
}
goto match;
}
if (*pat == NV_PATH_SEPARATOR) {
pat++;
if (*pat==0) return false;
}
if (*pat!=*str) return false;
match:
pat++;
str++;
}
}
/** Empty string. */
StringBuilder::StringBuilder() : m_size(0), m_str(NULL)
{
}
/** Preallocate space. */
StringBuilder::StringBuilder( int size_hint ) : m_size(size_hint)
{
nvDebugCheck(m_size > 0);
m_str = strAlloc(m_size);
*m_str = '\0';
}
/** Copy ctor. */
StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL)
{
copy(s);
}
/** Allocate and copy string. */
StringBuilder::StringBuilder( int size_hint, const StringBuilder & s) : m_size(size_hint), m_str(NULL)
{
nvDebugCheck(m_size > 0);
m_str = strAlloc(m_size);
copy(s);
}
/** Allocate and format string. */
StringBuilder::StringBuilder( const char * fmt, ... ) : m_size(0), m_str(NULL)
{
nvDebugCheck(fmt != NULL);
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
/** Allocate and format string. */
StringBuilder::StringBuilder( int size_hint, const char * fmt, ... ) : m_size(size_hint), m_str(NULL)
{
nvDebugCheck(m_size > 0);
nvDebugCheck(fmt != NULL);
m_str = strAlloc(m_size);
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
/** Delete the string. */
StringBuilder::~StringBuilder()
{
m_size = 0;
strFree(m_str);
m_str = NULL;
}
/** Format a string safely. */
StringBuilder & StringBuilder::format( const char * fmt, ... )
{
nvDebugCheck(fmt != NULL);
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
return *this;
}
/** Format a string safely. */
StringBuilder & StringBuilder::format( const char * fmt, va_list arg )
{
nvCheck(fmt != NULL);
nvCheck(m_size >= 0);
if( m_size == 0 ) {
m_size = 64;
m_str = strAlloc( m_size );
}
va_list tmp;
va_copy(tmp, arg);
#if NV_CC_MSVC && _MSC_VER >= 1400
int n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp);
#else
int n = vsnprintf(m_str, m_size, fmt, tmp);
#endif
va_end(tmp);
while( n < 0 || n >= int(m_size) ) {
if( n > -1 ) {
m_size = n + 1;
}
else {
m_size *= 2;
}
m_str = strReAlloc(m_str, m_size);
va_copy(tmp, arg);
#if NV_CC_MSVC && _MSC_VER >= 1400
n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp);
#else
n = vsnprintf(m_str, m_size, fmt, tmp);
#endif
va_end(tmp);
}
nvDebugCheck(n < int(m_size));
// Make sure it's null terminated.
nvDebugCheck(m_str[n] == '\0');
//str[n] = '\0';
return *this;
}
/** Append a string. */
StringBuilder & StringBuilder::append( const char * s )
{
nvCheck(s != NULL);
nvCheck(m_size >= 0);
const uint slen = uint(strlen( s ));
if( m_str == NULL ) {
m_size = slen + 1;
m_str = strAlloc(m_size);
strCpy( m_str, m_size, s );
}
else {
const uint len = uint(strlen( m_str ));
if( m_size < len + slen + 1 ) {
m_size = len + slen + 1;
m_str = strReAlloc(m_str, m_size);
}
strCat( m_str, m_size, s );
}
return *this;
}
/** Append a formatted string. */
StringBuilder & StringBuilder::appendFormat( const char * format, ... )
{
nvDebugCheck( format != NULL );
va_list arg;
va_start( arg, format );
appendFormat( format, arg );
va_end( arg );
return *this;
}
/** Append a formatted string. */
StringBuilder & StringBuilder::appendFormat( const char * format, va_list arg )
{
nvDebugCheck( format != NULL );
va_list tmp;
va_copy(tmp, arg);
StringBuilder tmp_str;
tmp_str.format( format, tmp );
append( tmp_str );
va_end(tmp);
return *this;
}
/** Convert number to string in the given base. */
StringBuilder & StringBuilder::number( int i, int base )
{
nvCheck( base >= 2 );
nvCheck( base <= 36 );
// @@ This needs to be done correctly.
// length = floor(log(i, base));
uint len = uint(log(float(i)) / log(float(base)) + 2); // one more if negative
reserve(len);
if( i < 0 ) {
*m_str = '-';
*i2a(uint(-i), m_str+1, base) = 0;
}
else {
*i2a(i, m_str, base) = 0;
}
return *this;
}
/** Convert number to string in the given base. */
StringBuilder & StringBuilder::number( uint i, int base )
{
nvCheck( base >= 2 );
nvCheck( base <= 36 );
// @@ This needs to be done correctly.
// length = floor(log(i, base));
uint len = uint(log(float(i)) / log(float(base)) - 0.5f + 1);
reserve(len);
*i2a(i, m_str, base) = 0;
return *this;
}
/** Resize the string preserving the contents. */
StringBuilder & StringBuilder::reserve( uint size_hint )
{
nvCheck(size_hint != 0);
if( size_hint > m_size ) {
m_str = strReAlloc(m_str, size_hint);
m_size = size_hint;
}
return *this;
}
/** Copy a string safely. */
StringBuilder & StringBuilder::copy( const char * s )
{
nvCheck( s != NULL );
uint str_size = uint(strlen( s )) + 1;
reserve(str_size);
strCpy( m_str, str_size, s );
return *this;
}
/** Copy an StringBuilder. */
StringBuilder & StringBuilder::copy( const StringBuilder & s )
{
if( s.m_str == NULL ) {
nvCheck( s.m_size == 0 );
m_size = 0;
strFree( m_str );
m_str = NULL;
}
else {
reserve( s.m_size );
strCpy( m_str, s.m_size, s.m_str );
}
return *this;
}
/** Reset the string. */
void StringBuilder::reset()
{
m_size = 0;
strFree( m_str );
m_str = NULL;
}
Path::Path(const char * fmt, ...)
{
nvDebugCheck( fmt != NULL );
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
Path::Path(int size_hint, const char * fmt, ...) : StringBuilder(size_hint)
{
nvDebugCheck( fmt != NULL );
va_list arg;
va_start( arg, fmt );
format( fmt, arg );
va_end( arg );
}
/// Get the file name from a path.
const char * Path::fileName() const
{
return fileName(m_str);
}
/// Get the extension from a file path.
const char * Path::extension() const
{
return extension(m_str);
}
/// Toggles path separators (ie. \\ into /).
void Path::translatePath()
{
nvCheck( m_str != NULL );
for(int i = 0; ; i++) {
if( m_str[i] == '\0' ) break;
#if NV_PATH_SEPARATOR == '/'
if( m_str[i] == '\\' ) m_str[i] = NV_PATH_SEPARATOR;
#else
if( m_str[i] == '/' ) m_str[i] = NV_PATH_SEPARATOR;
#endif
}
}
/**
* Strip the file name from a path.
* @warning path cannot end with '/' o '\\', can't it?
*/
void Path::stripFileName()
{
nvCheck( m_str != NULL );
int length = (int)strlen(m_str) - 1;
while (length > 0 && m_str[length] != '/' && m_str[length] != '\\'){
length--;
}
if( length ) {
m_str[length+1] = 0;
}
else {
m_str[0] = 0;
}
}
/// Strip the extension from a path name.
void Path::stripExtension()
{
nvCheck( m_str != NULL );
int length = (int)strlen(m_str) - 1;
while( length > 0 && m_str[length] != '.' ) {
length--;
if( m_str[length] == NV_PATH_SEPARATOR ) {
return; // no extension
}
}
if( length ) {
m_str[length] = 0;
}
}
/// Get the path separator.
// static
char Path::separator()
{
return NV_PATH_SEPARATOR;
}
// static
const char * Path::fileName(const char * str)
{
nvCheck( str != NULL );
int length = (int)strlen(str) - 1;
while( length >= 0 && str[length] != separator() ) {
length--;
}
return &str[length+1];
}
// static
const char * Path::extension(const char * str)
{
nvCheck( str != NULL );
int length, l;
l = length = (int)strlen( str );
while( length > 0 && str[length] != '.' ) {
length--;
if( str[length] == separator() ) {
return &str[l]; // no extension
}
}
if( length == 0 ) {
return &str[l];
}
return &str[length];
}
// static
String String::s_null(String::null);
/// Clone this string
String String::clone() const
{
String str(data);
return str;
}
void String::setString(const char * str)
{
if( str == NULL ) {
data = s_null.data;
}
else {
allocString( str );
}
addRef();
}
void String::setString(const char * str, int length)
{
nvDebugCheck(str != NULL);
allocString(str, length);
addRef();
}
void String::setString(const StringBuilder & str)
{
if( str.str() == NULL ) {
data = s_null.data;
}
else {
allocString(str);
}
addRef();
}

348
src/nvcore/StrLib.h Normal file
View File

@ -0,0 +1,348 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_STRING_H
#define NV_CORE_STRING_H
#include <nvcore/nvcore.h>
#include <nvcore/Containers.h> // swap
#include <string.h> // strlen, strcmp, etc.
namespace nv
{
uint strHash(const char * str, uint h) NV_PURE;
/// String hash vased on Bernstein's hash.
inline uint strHash(const char * data, uint h = 5381)
{
uint i;
while(data[i] != 0) {
h = (33 * h) ^ uint(data[i]);
i++;
}
return h;
}
template <> struct hash<const char *> {
uint operator()(const char * str) const { return strHash(str); }
};
NVCORE_API int strCaseCmp(const char * s1, const char * s2) NV_PURE;
NVCORE_API int strCmp(const char * s1, const char * s2) NV_PURE;
NVCORE_API void strCpy(char * dst, int size, const char * src);
NVCORE_API void strCat(char * dst, int size, const char * src);
NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE;
/// String builder.
class StringBuilder
{
public:
NVCORE_API StringBuilder();
NVCORE_API explicit StringBuilder( int size_hint );
NVCORE_API StringBuilder( const StringBuilder & );
NVCORE_API StringBuilder( int size_hint, const StringBuilder & );
NVCORE_API StringBuilder( const char * format, ... ) __attribute__((format (printf, 2, 3)));
NVCORE_API StringBuilder( int size_hint, const char * format, ... ) __attribute__((format (printf, 3, 4)));
NVCORE_API ~StringBuilder();
NVCORE_API StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
NVCORE_API StringBuilder & format( const char * format, va_list arg );
NVCORE_API StringBuilder & append( const char * str );
NVCORE_API StringBuilder & appendFormat( const char * format, ... ) __attribute__((format (printf, 2, 3)));
NVCORE_API StringBuilder & appendFormat( const char * format, va_list arg );
NVCORE_API StringBuilder & number( int i, int base = 10 );
NVCORE_API StringBuilder & number( uint i, int base = 10 );
NVCORE_API StringBuilder & reserve( uint size_hint );
NVCORE_API StringBuilder & copy( const char * str );
NVCORE_API StringBuilder & copy( const StringBuilder & str );
NVCORE_API StringBuilder & toLower();
NVCORE_API StringBuilder & toUpper();
NVCORE_API void reset();
NVCORE_API bool empty() const { return m_size == 0; }
// const char * accessors
operator const char * () const { return m_str; }
operator char * () { return m_str; }
const char * str() const { return m_str; }
char * str() { return m_str; }
/// Implement value semantics.
StringBuilder & operator=( const StringBuilder & s ) {
return copy(s);
}
/// Equal operator.
bool operator==( const StringBuilder & s ) const {
nvCheck(m_str != NULL);
nvCheck(s.m_str != NULL);
return strcmp(s.m_str, m_str) != 0;
}
/// Return the exact length.
uint length() const { nvCheck(m_str != NULL); return uint(strlen(m_str)); }
/// Return the size of the string container.
uint capacity() const { nvCheck(m_str != NULL); return m_size; }
/// Return the hash of the string.
uint hash() const { nvCheck(m_str != NULL); return strHash(m_str); }
/// Swap strings.
friend void swap(StringBuilder & a, StringBuilder & b) {
nv::swap(a.m_size, b.m_size);
nv::swap(a.m_str, b.m_str);
}
static char separator();
protected:
/// Size of the string container.
uint m_size;
/// String.
char * m_str;
};
/// Path string.
class Path : public StringBuilder
{
public:
Path() : StringBuilder() {}
explicit Path(int size_hint) : StringBuilder(size_hint) {}
Path(const StringBuilder & str) : StringBuilder(str) {}
Path(int size_hint, const StringBuilder & str) : StringBuilder(size_hint, str) {}
NVCORE_API Path(const char * format, ...) __attribute__((format (printf, 2, 3)));
NVCORE_API Path(int size_hint, const char * format, ...) __attribute__((format (printf, 3, 4)));
NVCORE_API const char * fileName() const;
NVCORE_API const char * extension() const;
NVCORE_API void translatePath();
NVCORE_API void stripFileName();
NVCORE_API void stripExtension();
// statics
NVCORE_API static char separator();
NVCORE_API static const char * fileName(const char *);
NVCORE_API static const char * extension(const char *);
};
/// String class.
class String
{
public:
/// Constructs a null string. @sa isNull()
String()
{
data = s_null.data;
addRef();
}
/// Constructs a shared copy of str.
String(const String & str)
{
data = str.data;
addRef();
}
/// Constructs a shared string from a standard string.
String(const char * str)
{
setString(str);
}
/// Constructs a shared string from a standard string.
String(const char * str, int length)
{
setString(str, length);
}
/// Constructs a shared string from a StringBuilder.
String(const StringBuilder & str)
{
setString(str);
}
/// Dtor.
~String()
{
nvDebugCheck(data != NULL);
release();
}
NVCORE_API String clone() const;
/// Release the current string and allocate a new one.
const String & operator=( const char * str )
{
release();
setString( str );
return *this;
}
/// Release the current string and allocate a new one.
const String & operator=( const StringBuilder & str )
{
release();
setString( str );
return *this;
}
/// Implement value semantics.
String & operator=( const String & str )
{
release();
data = str.data;
addRef();
return *this;
}
/// Equal operator.
bool operator==( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return true;
}
return strcmp(data, str.data) == 0;
}
/// Equal operator.
bool operator==( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
return strcmp(data, str) == 0;
}
/// Not equal operator.
bool operator!=( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return false;
}
return strcmp(data, str.data) != 0;
}
/// Not equal operator.
bool operator!=( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
return strcmp(data, str) != 0;
}
/// Returns true if this string is the null string.
bool isNull() const { nvDebugCheck(data != NULL); return data == s_null.data; }
/// Return the exact length.
uint length() const { nvDebugCheck(data != NULL); return uint(strlen(data)); }
/// Return the hash of the string.
uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
/// const char * cast operator.
operator const char * () const { nvDebugCheck(data != NULL); return data; }
/// Get string pointer.
const char * str() const { nvDebugCheck(data != NULL); return data; }
private:
enum null_t { null };
// Private constructor for null string.
String(null_t) {
setString("");
}
// Add reference count.
void addRef() {
nvDebugCheck(data != NULL);
setRefCount(getRefCount() + 1);
}
// Decrease reference count.
void release() {
nvDebugCheck(data != NULL);
const uint16 count = getRefCount();
setRefCount(count - 1);
if( count - 1 == 0 ) {
mem::free(data - 2);
data = NULL;
}
}
uint16 getRefCount() const {
return *reinterpret_cast<const uint16 *>(data - 2);
}
void setRefCount(uint16 count) {
nvCheck(count < 0xFFFF);
*reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
}
void setData(const char * str) {
data = str + 2;
}
void allocString(const char * str)
{
allocString(str, (int)strlen(str));
}
void allocString(const char * str, int len)
{
const char * ptr = static_cast<const char *>(mem::malloc(2 + len + 1));
setData( ptr );
setRefCount( 0 );
// Copy string.
strCpy(const_cast<char *>(data), len + 1, str);
}
NVCORE_API void setString(const char * str);
NVCORE_API void setString(const char * str, int length);
NVCORE_API void setString(const StringBuilder & str);
/// Swap strings.
friend void swap(String & a, String & b) {
swap(a.data, b.data);
}
private:
NVCORE_API static String s_null;
const char * data;
};
} // nv namespace
#endif // NV_CORE_STRING_H

165
src/nvcore/Stream.h Normal file
View File

@ -0,0 +1,165 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NVCORE_STREAM_H
#define NVCORE_STREAM_H
#include <nvcore/nvcore.h>
#include <nvcore/Debug.h>
namespace nv
{
/** Base stream class. */
class Stream {
public:
enum ByteOrder {
LittleEndian = false,
BigEndian = true,
};
/// Get the byte order of the system.
static ByteOrder getSystemByteOrder() {
# if NV_LITTLE_ENDIAN
return LittleEndian;
# else
return BigEndian;
# endif
}
/// Ctor.
Stream() : m_byteOrder(LittleEndian) { }
/// Virtual destructor.
virtual ~Stream() {}
/// Set byte order.
void setByteOrder(ByteOrder bo) { m_byteOrder = bo; }
/// Get byte order.
ByteOrder byteOrder() const { return m_byteOrder; }
/// Serialize the given data.
virtual void serialize( void * data, int len ) = 0;
/// Move to the given position in the archive.
virtual void seek( int pos ) = 0;
/// Return the current position in the archive.
virtual int tell() const = 0;
/// Return the current size of the archive.
virtual int size() const = 0;
/// Determine if there has been any error.
virtual bool isError() const = 0;
/// Return true if the stream is at the end.
virtual bool isAtEnd() const = 0;
/// Return true if the stream is seekable.
virtual bool isSeekable() const = 0;
/// Return true if this is an input stream.
virtual bool isLoading() const = 0;
/// Return true if this is an output stream.
virtual bool isSaving() const = 0;
// friends
friend Stream & operator<<( Stream & s, bool & c ) {
# if NV_OS_DARWIN
nvStaticCheck(sizeof(bool) == 4);
uint8 b = c ? 1 : 0;
s.serialize( &b, 1 );
c = (b == 1);
# else
nvStaticCheck(sizeof(bool) == 1);
s.serialize( &c, 1 );
# endif
return s;
}
friend Stream & operator<<( Stream & s, char & c ) {
nvStaticCheck(sizeof(char) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, uint8 & c ) {
nvStaticCheck(sizeof(uint8) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, int8 & c ) {
nvStaticCheck(sizeof(int8) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, uint16 & c ) {
nvStaticCheck(sizeof(uint16) == 2);
s.byteOrderSerialize( &c, 2 );
return s;
}
friend Stream & operator<<( Stream & s, int16 & c ) {
nvStaticCheck(sizeof(int16) == 2);
s.byteOrderSerialize( &c, 2 );
return s;
}
friend Stream & operator<<( Stream & s, uint32 & c ) {
nvStaticCheck(sizeof(uint32) == 4);
s.byteOrderSerialize( &c, 4 );
return s;
}
friend Stream & operator<<( Stream & s, int32 & c ) {
nvStaticCheck(sizeof(int32) == 4);
s.byteOrderSerialize( &c, 4 );
return s;
}
friend Stream & operator<<( Stream & s, uint64 & c ) {
nvStaticCheck(sizeof(uint64) == 8);
s.byteOrderSerialize( &c, 8 );
return s;
}
friend Stream & operator<<( Stream & s, int64 & c ) {
nvStaticCheck(sizeof(int64) == 8);
s.byteOrderSerialize( &c, 8 );
return s;
}
friend Stream & operator<<( Stream & s, float & c ) {
nvStaticCheck(sizeof(float) == 4);
s.byteOrderSerialize( &c, 4 );
return s;
}
friend Stream & operator<<( Stream & s, double & c ) {
nvStaticCheck(sizeof(double) == 8);
s.byteOrderSerialize( &c, 8 );
return s;
}
protected:
/** Serialize in the stream byte order. */
Stream & byteOrderSerialize( void * v, int len ) {
if( m_byteOrder == getSystemByteOrder() ) {
serialize( v, len );
}
else {
for( int i=len-1; i>=0; i-- ) {
serialize( (uint8 *)v + i, 1 );
}
}
return *this;
}
private:
ByteOrder m_byteOrder;
};
} // nv namespace
#endif // NV_STREAM_H

85
src/nvcore/TextReader.cpp Normal file
View File

@ -0,0 +1,85 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/TextReader.h>
using namespace nv;
/// Peek next character.
char TextReader::peek()
{
nvDebugCheck(m_stream != NULL);
nvDebugCheck(m_stream->isSeekable());
if (m_stream->isAtEnd()) {
return 0;
}
uint pos = m_stream->tell();
char c;
m_stream->serialize(&c, 1);
m_stream->seek(pos);
return c;
}
/// Read a single char.
char TextReader::read()
{
nvDebugCheck(m_stream != NULL);
if( m_stream->isAtEnd() ) {
return 0;
}
char c;
m_stream->serialize(&c, 1);
return c;
}
/// Read from the current location to the end of the stream.
const char * TextReader::readToEnd()
{
nvDebugCheck(m_stream != NULL);
const int size = m_stream->size();
m_text.clear();
m_text.reserve(size + 1);
m_text.resize(size);
m_stream->serialize(m_text.unsecureBuffer(), size);
m_text.pushBack('\0');
return m_text.buffer();
}
/// Read from the current location to the end of the line.
const char * TextReader::readLine()
{
m_text.clear();
if (m_stream->isAtEnd()) {
return NULL;
}
while (true) {
char c = read();
if (c == 0 || c == '\n') {
break;
}
else if (c == '\r') {
if( peek() == '\n' ) {
read();
}
break;
}
m_text.pushBack(c);
}
m_text.pushBack('\0');
return m_text.buffer();
}

38
src/nvcore/TextReader.h Normal file
View File

@ -0,0 +1,38 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NVCORE_TEXTREADER_H
#define NVCORE_TEXTREADER_H
#include <nvcore/nvcore.h>
#include <nvcore/Stream.h>
#include <nvcore/Containers.h>
namespace nv
{
/// Text reader.
class NVCORE_CLASS TextReader {
public:
/// Ctor.
TextReader(Stream * stream) : m_stream(stream), m_text(512) {
nvCheck(stream != NULL);
nvCheck(stream->isLoading());
}
char peek();
char read();
const char *readToEnd();
// Returns a temporary string.
const char * readLine();
private:
Stream * m_stream;
Array<char> m_text;
};
} // nv namespace
#endif // NVCORE_TEXTREADER_H

44
src/nvcore/TextWriter.h Normal file
View File

@ -0,0 +1,44 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NVCORE_TEXTWRITER_H
#define NVCORE_TEXTWRITER_H
#include <nvcore/nvcore.h>
#include <nvcore/Stream.h>
#include <nvcore/StrLib.h>
// @@ NOT IMPLEMENTED !!!
namespace nv
{
/// Text writer.
class NVCORE_CLASS TextWriter
{
public:
/// Ctor.
TextWriter(Stream * s) : s(s), str(1024) {
nvDebugCheck(s != NULL);
nvCheck(s->IsSaving());
}
void write( const char * str, uint len );
void write( const char * format, ... ) __attribute__((format (printf, 2, 3)));
void write( const char * format, va_list arg );
private:
Stream * s;
// Temporary string.
StringBuilder str;
};
} // nv namespace
#endif // NVCORE_TEXTWRITER_H

245
src/nvcore/Tokenizer.cpp Normal file
View File

@ -0,0 +1,245 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Tokenizer.h>
#include <nvcore/StrLib.h>
#include <stdio.h> // vsscanf
#include <stdarg.h> // va_list
#include <stdlib.h> // atof, atoi
#if NV_CC_MSVC
/* vsscanf for Win32
* Written 5/2003 by <mgix@mgix.com>
* This code is in the Public Domain
*/
#include <malloc.h> // alloca
//#include <string.h>
static int vsscanf(const char * buffer, const char * format, va_list argPtr)
{
// Get an upper bound for the # of args
size_t count = 0;
const char *p = format;
while(1) {
char c = *(p++);
if(c==0) break;
if(c=='%' && (p[0]!='*' && p[0]!='%')) ++count;
}
// Make a local stack
size_t stackSize = (2+count)*sizeof(void*);
void **newStack = (void**)alloca(stackSize);
// Fill local stack the way sscanf likes it
newStack[0] = (void*)buffer;
newStack[1] = (void*)format;
memcpy(newStack+2, argPtr, count*sizeof(void*));
// @@ Use: CALL DWORD PTR [sscanf]
// Warp into system sscanf with new stack
int result;
void *savedESP;
__asm
{
mov savedESP, esp
mov esp, newStack
#if _MSC_VER >= 1400
call DWORD PTR [sscanf_s]
#else
call DWORD PTR [sscanf]
#endif
mov esp, savedESP
mov result, eax
}
return result;
}
/*
int hacky_vsscanf(const char *str, int count, const char *format, va_list ap) {
nvCheck(count < 8)
if (count == 0) {
}
void * arg0 = va_arg(ap, void *);
void * arg1 = va_arg(ap, void *);
void * arg2 = va_arg(ap, void *);
void * arg3 = va_arg(ap, void *);
void * arg4 = va_arg(ap, void *);
void * arg5 = va_arg(ap, void *);
void * arg6 = va_arg(ap, void *);
void * arg7 = va_arg(ap, void *);
return sscanf(str, format, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
}
*/
#endif
using namespace nv;
Token::Token() :
m_str(""), m_len(0)
{
}
Token::Token(const Token & token) :
m_str(token.m_str), m_len(token.m_len)
{
}
Token::Token(const char * str, int len) :
m_str(str), m_len(len)
{
}
bool Token::operator==(const char * str) const
{
return strncmp(m_str, str, m_len) == 0;
}
bool Token::operator!=(const char * str) const
{
return strncmp(m_str, str, m_len) != 0;
}
bool Token::isNull()
{
return m_len != 0;
}
float Token::toFloat() const
{
return float(atof(m_str));
}
int Token::toInt() const
{
return atoi(m_str);
}
uint Token::toUnsignedInt() const
{
// @@ TBD
return uint(atoi(m_str));
}
String Token::toString() const
{
return String(m_str, m_len);
}
bool Token::parse(const char * format, int count, ...) const
{
va_list arg;
va_start(arg, count);
int readCount = vsscanf(m_str, format, arg);
va_end(arg);
return readCount == count;
}
Tokenizer::Tokenizer(Stream * stream) :
m_reader(stream), m_lineNumber(0), m_columnNumber(0), m_delimiters("{}()="), m_spaces(" \t")
{
}
bool Tokenizer::nextLine(bool skipEmptyLines /*= true*/)
{
do {
if (!readLine()) {
return false;
}
}
while (!readToken() && skipEmptyLines);
return true;
}
bool Tokenizer::nextToken(bool skipEndOfLine /*= false*/)
{
if (!readToken()) {
if (!skipEndOfLine) {
return false;
}
else {
return nextLine(true);
}
}
return true;
}
bool Tokenizer::readToken()
{
skipSpaces();
const char * begin = m_line + m_columnNumber;
if (*begin == '\0') {
return false;
}
char c = readChar();
if (isDelimiter(c)) {
m_token = Token(begin, 1);
return true;
}
// @@ Add support for quoted tokens "", ''
int len = 0;
while (!isDelimiter(c) && !isSpace(c) && c != '\0') {
c = readChar();
len++;
}
m_columnNumber--;
m_token = Token(begin, len);
return true;
}
char Tokenizer::readChar()
{
return m_line[m_columnNumber++];
}
bool Tokenizer::readLine()
{
m_lineNumber++;
m_columnNumber = 0;
m_line = m_reader.readLine();
return m_line != NULL;
}
void Tokenizer::skipSpaces()
{
while (isSpace(readChar())) {}
m_columnNumber--;
}
bool Tokenizer::isSpace(char c)
{
uint i = 0;
while (m_spaces[i] != '\0') {
if (c == m_spaces[i]) {
return true;
}
i++;
}
return false;
}
bool Tokenizer::isDelimiter(char c)
{
uint i = 0;
while (m_delimiters[i] != '\0') {
if (c == m_delimiters[i]) {
return true;
}
i++;
}
return false;
}

95
src/nvcore/Tokenizer.h Normal file
View File

@ -0,0 +1,95 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_TOKENIZER_H
#define NV_CORE_TOKENIZER_H
#include <nvcore/nvcore.h>
#include <nvcore/Stream.h>
#include <nvcore/TextReader.h>
#include <nvcore/StrLib.h>
namespace nv
{
/// A token produced by the Tokenizer.
class NVCORE_CLASS Token
{
public:
Token();
Token(const Token & token);
Token(const char * str, int len);
bool operator==(const char * str) const;
bool operator!=(const char * str) const;
bool isNull();
float toFloat() const;
int toInt() const;
uint toUnsignedInt() const;
String toString() const;
bool parse(const char * format, int count, ...) const __attribute__((format (scanf, 2, 4)));
private:
const char * m_str;
int m_len;
};
/// Exception thrown by the tokenizer.
class TokenizerException
{
public:
TokenizerException(int line, int column) : m_line(line), m_column(column) {}
int line() const { return m_line; }
int column() const { return m_column; }
private:
int m_line;
int m_column;
};
/// A simple stream tokenizer.
class NVCORE_CLASS Tokenizer
{
public:
Tokenizer(Stream * stream);
bool nextLine(bool skipEmptyLines = true);
bool nextToken(bool skipEndOfLine = false);
const Token & token() const { return m_token; }
int lineNumber() const { return m_lineNumber; }
int columnNumber() const { return m_columnNumber; }
void setDelimiters(const char * str) { m_delimiters = str; }
const char * delimiters() const { return m_delimiters; }
void setSpaces(const char * str) { m_spaces = str; }
const char * spaces() const { return m_spaces; }
private:
char readChar();
bool readLine();
bool readToken();
void skipSpaces();
bool isSpace(char c);
bool isDelimiter(char c);
private:
TextReader m_reader;
const char * m_line;
Token m_token;
int m_lineNumber;
int m_columnNumber;
const char * m_delimiters;
const char * m_spaces;
};
} // nv namespace
#endif // NV_CORE_TOKENIZER_H

172
src/nvcore/nvcore.h Normal file
View File

@ -0,0 +1,172 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_H
#define NV_CORE_H
// cmake config
#include <nvconfig.h>
// Function linkage
#if NVCORE_SHARED
#ifdef NVCORE_EXPORTS
#define NVCORE_API DLL_EXPORT
#define NVCORE_CLASS DLL_EXPORT_CLASS
#else
#define NVCORE_API DLL_IMPORT
#define NVCORE_CLASS DLL_IMPORT
#endif
#else // NVCORE_SHARED
#define NVCORE_API
#define NVCORE_CLASS
#endif // NVCORE_SHARED
// Platform definitions
#include "poshlib/posh.h"
// OS:
// NV_OS_WIN32
// NV_OS_WIN64
// NV_OS_MINGW
// NV_OS_CYGWIN
// NV_OS_LINUX
// NV_OS_UNIX
// NV_OS_DARWIN
#define NV_OS_STRING POSH_OS_STRING
#if defined POSH_OS_LINUX
# define NV_OS_LINUX 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_CYGWIN32
# define NV_OS_CYGWIN 1
#elif defined POSH_OS_MINGW
# define NV_OS_MINGW 1
# define NV_OS_WIN32 1
#elif defined POSH_OS_OSX
# define NV_OS_DARWIN 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_UNIX
# define NV_OS_UNIX 1
#elif defined POSH_OS_WIN32
# define NV_OS_WIN32 1
#elif defined POSH_OS_WIN64
# define NV_OS_WIN64 1
#else
# error "Unsupported OS"
#endif
// CPUs:
// NV_CPU_X86
// NV_CPU_X86_64
// NV_CPU_PPC
#define NV_CPU_STRING POSH_CPU_STRING
#if defined POSH_CPU_X86_64
# define NV_CPU_X86_64 1
#elif defined POSH_CPU_X86
# define NV_CPU_X86 1
#elif defined POSH_CPU_PPC
# define NV_CPU_PPC 1
#else
# error "Unsupported CPU"
#endif
// Compiler:
// NV_CC_GNUC
// NV_CC_MSVC
// @@ NV_CC_MSVC6
// @@ NV_CC_MSVC7
// @@ NV_CC_MSVC8
#if defined POSH_COMPILER_GCC
# define NV_CC_GNUC 1
# define NV_CC_STRING "gcc"
#elif defined POSH_COMPILER_MSVC
# define NV_CC_MSVC 1
# define NV_CC_STRING "msvc"
#else
# error "Unsupported compiler"
#endif
// Endiannes:
#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN
#define NV_BIG_ENDIAN POSH_BIG_ENDIAN
#define NV_ENDIAN_STRING POSH_ENDIAN_STRING
// Version string:
#define NV_VERSION_STRING \
NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \
NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__
/// Disable copy constructor and assignment operator.
/// @hideinitializer
#define NV_FORBID_COPY(C) \
private: \
C( const C & ); \
C &operator=( const C & );
/// Disable dynamic allocation on the heap.
/// See Prohibiting Heap-Based Objects in More Effective C++.
/// @hideinitializer
#define NV_FORBID_HEAPALLOC() \
private: \
static void *operator new(size_t size); \
static void *operator new[](size_t size);
// String concatenation macros.
#define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
#define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
#define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3)
#define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3
// Startup initialization macro.
#define NV_AT_STARTUP(some_code) \
namespace { \
static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \
NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \
} \
NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
};
/// Indicate the compiler that the parameter is not used to suppress compier warnings.
/// @hideinitializer
#define NV_UNUSED(a) ((a)=(a))
/// Null index. @@ Move this somewhere else... This could have collisions with other definitions!
#define NIL uint(~0)
/// Null pointer.
#ifndef NULL
#define NULL 0
#endif
// Platform includes
#if NV_CC_MSVC
# if NV_OS_WIN32
# include "DefsVcWin32.h"
# else
# error "MSVC: Platform not supported"
# endif
#elif NV_CC_GNUC
# if NV_OS_LINUX
# include "DefsGnucLinux.h"
# elif NV_OS_DARWIN
# include "DefsGnucDarwin.h"
# elif NV_OS_MINGW
# include "DefsGnucWin32.h"
# elif NV_OS_CYGWIN
# error "GCC: Cygwin not supported"
# else
# error "GCC: Platform not supported"
# endif
#endif
#endif // NV_CORE_H

View File

@ -0,0 +1,14 @@
SET(POSHLIB_SRCS
posh.c
posh.h)
ADD_LIBRARY(posh STATIC ${POSHLIB_SRCS})
ADD_EXECUTABLE(archtest tests/arch/archtest.c)
TARGET_LINK_LIBRARIES(archtest posh)
#ADD_EXECUTABLE(linktest tests/linktest/linktest.cpp tests/linktest/testlib.cpp)
#TARGET_LINK_LIBRARIES(linktest posh)
ADD_TEST(POSHTEST archtest)

926
src/nvcore/poshlib/posh.c Normal file
View File

@ -0,0 +1,926 @@
/*
LICENSE:
Copyright (c) 2004, Brian Hook
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* The names of this package'ss contributors contributors may not
be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
@file posh.c
@author Brian Hook
@date 2002
@brief Portable Open Source Harness primary source file
*/
#include "posh.h"
#if !defined FORCE_DOXYGEN
#if !defined POSH_NO_FLOAT
# define POSH_FLOAT_STRING "enabled"
#else
# define POSH_FLOAT_STRING "disabled"
#endif
#if defined POSH_64BIT_INTEGER
# define POSH_64BIT_INTEGER_STRING "yes"
#else
# define POSH_64BIT_INTEGER_STRING "no"
#endif
#if defined POSH_64BIT_POINTER
# define POSH_POINTER_STRING "64-bits"
#else
# define POSH_POINTER_STRING "32-bits"
#endif
#if defined POSH_LITTLE_ENDIAN
# define IS_BIG_ENDIAN 0
# define NATIVE16 POSH_LittleU16
# define NATIVE32 POSH_LittleU32
# define NATIVE64 POSH_LittleU64
# define FOREIGN16 POSH_BigU16
# define FOREIGN32 POSH_BigU32
# define FOREIGN64 POSH_BigU64
#else
# define IS_BIG_ENDIAN 1
# define NATIVE16 POSH_BigU16
# define NATIVE32 POSH_BigU32
# define NATIVE64 POSH_BigU64
# define FOREIGN16 POSH_LittleU16
# define FOREIGN32 POSH_LittleU32
# define FOREIGN64 POSH_LittleU64
#endif /* POSH_LITTLE_ENDIAN */
static
int
s_testBigEndian( void )
{
union
{
posh_byte_t c[ 4 ];
posh_u32_t i;
} u;
u.i= 1;
if ( u.c[ 0 ] == 1 )
{
return 0;
}
return 1;
}
static
const char *
s_testSerialization( void )
{
posh_byte_t serbuf[ 8 ];
posh_u16_t tmp16;
posh_u32_t tmp32;
/* 16-bit serialization */
POSH_WriteU16ToLittle( serbuf, 0xABCD );
if ( ( tmp16 = POSH_ReadU16FromLittle( serbuf ) ) != 0xABCD )
{
return "*ERROR: failed little-endian 16-bit serialization test";
}
POSH_WriteU16ToBig( serbuf, 0xABCD );
if ( ( tmp16 = POSH_ReadU16FromBig( serbuf ) ) != 0xABCD )
{
return "*ERROR: failed big-endian 16-bit serialization test";
}
/* 32-bit serialization */
POSH_WriteU32ToLittle( serbuf, 0xABCD1234L );
if ( ( tmp32 = POSH_ReadU32FromLittle( serbuf ) ) != 0xABCD1234 )
{
return "*ERROR: failed little-endian 32-bit serialization test";
}
POSH_WriteU32ToBig( serbuf, 0xABCD1234L );
if ( ( tmp32 = POSH_ReadU32FromBig( serbuf ) ) != 0xABCD1234 )
{
return "*ERROR: failed big-endian 32-bit serialization test";
}
#if defined POSH_64BIT_INTEGER
{
#define REF64 POSH_U64(0xFEDCBA9876543210)
posh_u64_t tmp64;
POSH_WriteU64ToLittle( serbuf, REF64 );
if ( ( tmp64 = POSH_ReadU64FromLittle( serbuf ) ) != REF64 )
{
return "*ERROR: failed little-endian 64-bit serialization test";
}
POSH_WriteU64ToBig( serbuf, REF64 );
if ( ( tmp64 = POSH_ReadU64FromBig( serbuf ) ) != REF64 )
{
return "*ERROR: failed big-endian 64-bit serialization test";
}
}
#endif
return 0;
}
#if !defined POSH_NO_FLOAT
static
const char *
s_testFloatingPoint( void )
{
float fRef = 10.0f/30.0f;
double dRef = 10.0/30.0;
posh_byte_t dbuf[ 8 ];
float fTmp;
double dTmp;
fTmp = POSH_FloatFromLittleBits( POSH_LittleFloatBits( fRef ) );
if ( fTmp != fRef )
{
return "*ERROR: POSH little endian floating point conversion failed. Please report this to poshlib@poshlib.org!\n";
}
fTmp = POSH_FloatFromBigBits( POSH_BigFloatBits( fRef ) );
if ( fTmp != fRef )
{
return "*ERROR: POSH big endian floating point conversion failed. Please report this to poshlib@poshlib.org!\n";
}
POSH_DoubleBits( dRef, dbuf );
dTmp = POSH_DoubleFromBits( dbuf );
if ( dTmp != dRef )
{
return "*ERROR: POSH double precision floating point serialization failed. Please report this to poshlib@poshlib.org!\n";
}
return 0;
}
#endif /* !defined POSH_NO_FLOAT */
static
const char *
s_testEndianess( void )
{
/* check endianess */
if ( s_testBigEndian() != IS_BIG_ENDIAN )
{
return "*ERROR: POSH compile time endianess does not match run-time endianess verification. Please report this to poshlib@poshlib.org!\n";
}
/* make sure our endian swap routines work */
if ( ( NATIVE32( 0x11223344L ) != 0x11223344L ) ||
( FOREIGN32( 0x11223344L ) != 0x44332211L ) ||
( NATIVE16( 0x1234 ) != 0x1234 ) ||
( FOREIGN16( 0x1234 ) != 0x3412 ) )
{
return "*ERROR: POSH endianess macro selection failed. Please report this to poshlib@poshlib.org!\n";
}
/* test serialization routines */
return 0;
}
#endif /* !defined FORCE_DOXYGEN */
/**
Returns a string describing this platform's basic attributes.
POSH_GetArchString() reports on an architecture's statically determined
attributes. In addition, it will perform run-time verification checks
to make sure the various platform specific functions work. If an error
occurs, please contact me at poshlib@poshlib.org so we can try to resolve
what the specific failure case is.
@returns a string describing this platform on success, or a string in the
form "*ERROR: [text]" on failure. You can simply check to see if
the first character returned is '*' to verify an error condition.
*/
const char *
POSH_GetArchString( void )
{
const char *err;
const char *s = "OS:.............."POSH_OS_STRING"\n"
"CPU:............."POSH_CPU_STRING"\n"
"endian:.........."POSH_ENDIAN_STRING"\n"
"ptr size:........"POSH_POINTER_STRING"\n"
"64-bit ints......"POSH_64BIT_INTEGER_STRING"\n"
"floating point..."POSH_FLOAT_STRING"\n"
"compiler........."POSH_COMPILER_STRING"\n";
/* test endianess */
err = s_testEndianess();
if ( err != 0 )
{
return err;
}
/* test serialization */
err = s_testSerialization();
if ( err != 0 )
{
return err;
}
#if !defined POSH_NO_FLOAT
/* check that our floating point support is correct */
err = s_testFloatingPoint();
if ( err != 0 )
{
return err;
}
#endif
return s;
}
/* ---------------------------------------------------------------------------*/
/* BYTE SWAPPING SUPPORT */
/* ---------------------------------------------------------------------------*/
/**
* Byte swaps a 16-bit unsigned value
*
@param v [in] unsigned 16-bit input value to swap
@returns a byte swapped version of v
*/
posh_u16_t
POSH_SwapU16( posh_u16_t v )
{
posh_u16_t swapped;
swapped = v << 8;
swapped |= v >> 8;
return swapped;
}
/**
* Byte swaps a 16-bit signed value
*
@param v [in] signed 16-bit input value to swap
@returns a byte swapped version of v
@remarks This just calls back to the unsigned version, since byte swapping
is independent of sign. However, we still provide this function to
avoid signed/unsigned mismatch compiler warnings.
*/
posh_i16_t
POSH_SwapI16( posh_i16_t v )
{
return ( posh_i16_t ) POSH_SwapU16( v );
}
/**
* Byte swaps a 32-bit unsigned value
*
@param v [in] unsigned 32-bit input value to swap
@returns a byte swapped version of v
*/
posh_u32_t
POSH_SwapU32( posh_u32_t v )
{
posh_u32_t swapped;
swapped = ( v & 0xFF ) << 24;
swapped |= ( v & 0xFF00 ) << 8;
swapped |= ( v >> 8 ) & 0xFF00;
swapped |= ( v >> 24 );
return swapped;
}
/**
* Byte swaps a 32-bit signed value
*
@param v [in] signed 32-bit input value to swap
@returns a byte swapped version of v
@remarks This just calls back to the unsigned version, since byte swapping
is independent of sign. However, we still provide this function to
avoid signed/unsigned mismatch compiler warnings.
*/
posh_i32_t
POSH_SwapI32( posh_i32_t v )
{
return ( posh_i32_t ) POSH_SwapU32( ( posh_u32_t ) v );
}
#if defined POSH_64BIT_INTEGER
/**
* Byte swaps a 64-bit unsigned value
@param v [in] a 64-bit input value to swap
@ingroup SixtyFourBit
@returns a byte swapped version of v
*/
posh_u64_t
POSH_SwapU64( posh_u64_t v )
{
posh_byte_t tmp;
union {
posh_byte_t bytes[ 8 ];
posh_u64_t u64;
} u;
u.u64 = v;
tmp = u.bytes[ 0 ]; u.bytes[ 0 ] = u.bytes[ 7 ]; u.bytes[ 7 ] = tmp;
tmp = u.bytes[ 1 ]; u.bytes[ 1 ] = u.bytes[ 6 ]; u.bytes[ 6 ] = tmp;
tmp = u.bytes[ 2 ]; u.bytes[ 2 ] = u.bytes[ 5 ]; u.bytes[ 5 ] = tmp;
tmp = u.bytes[ 3 ]; u.bytes[ 3 ] = u.bytes[ 4 ]; u.bytes[ 4 ] = tmp;
return u.u64;
}
/**
* Byte swaps a 64-bit signed value
@param v [in] a 64-bit input value to swap
@ingroup SixtyFourBit
@returns a byte swapped version of v
*/
posh_i64_t
POSH_SwapI64( posh_i64_t v )
{
return ( posh_i64_t ) POSH_SwapU64( ( posh_u64_t ) v );
}
#endif /* defined POSH_64BIT_INTEGER */
/* ---------------------------------------------------------------------------*/
/* IN-MEMORY SERIALIZATION */
/* ---------------------------------------------------------------------------*/
/**
* Writes an unsigned 16-bit value to a little endian buffer
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian unsigned 16-bit value
@returns a pointer to the location two bytes after dst
@remarks does no validation of the inputs
*/
posh_u16_t *
POSH_WriteU16ToLittle( void *dst, posh_u16_t value )
{
posh_u16_t *p16 = ( posh_u16_t * ) dst;
*p16 = POSH_LittleU16(value);
return p16 + 1;
}
/**
* Writes a signed 16-bit value to a little endian buffer
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian signed 16-bit value
@returns a pointer to the location two bytes after dst
@remarks does no validation of the inputs. This simply calls
POSH_WriteU16ToLittle() with appropriate casting.
*/
posh_i16_t *
POSH_WriteI16ToLittle( void *dst, posh_i16_t value )
{
return ( posh_i16_t * ) POSH_WriteU16ToLittle( dst, ( posh_u16_t ) value );
}
/**
* Writes an unsigned 32-bit value to a little endian buffer
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian signed 32-bit value
@returns a pointer to the location four bytes after dst
@remarks does no validation of the inputs.
*/
posh_u32_t *
POSH_WriteU32ToLittle( void *dst, posh_u32_t value )
{
posh_u32_t *p32 = ( posh_u32_t * ) dst;
*p32 = POSH_LittleU32(value);
return p32 + 1;
}
/**
* Writes a signed 32-bit value to a little endian buffer
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian signed 32-bit value
@returns a pointer to the location four bytes after dst
@remarks does no validation of the inputs. This simply calls
POSH_WriteU32ToLittle() with appropriate casting.
*/
posh_i32_t *
POSH_WriteI32ToLittle( void *dst, posh_i32_t value )
{
return ( posh_i32_t * ) POSH_WriteU32ToLittle( dst, ( posh_u32_t ) value );
}
/**
* Writes an unsigned 16-bit value to a big endian buffer
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian unsigned 16-bit value
@returns a pointer to the location two bytes after dst
@remarks does no validation of the inputs
*/
posh_u16_t *
POSH_WriteU16ToBig( void *dst, posh_u16_t value )
{
posh_u16_t *p16 = ( posh_u16_t * ) dst;
*p16 = POSH_BigU16(value);
return p16 + 1;
}
/**
* Writes a signed 16-bit value to a big endian buffer
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian signed 16-bit value
@returns a pointer to the location two bytes after dst
@remarks does no validation of the inputs. This simply calls
POSH_WriteU16ToLittle() with appropriate casting.
*/
posh_i16_t *
POSH_WriteI16ToBig( void *dst, posh_i16_t value )
{
return ( posh_i16_t * ) POSH_WriteU16ToBig( dst, ( posh_u16_t ) value );
}
/**
* Writes an unsigned 32-bit value to a big endian buffer
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian unsigned 32-bit value
@returns a pointer to the location four bytes after dst
@remarks does no validation of the inputs.
*/
posh_u32_t *
POSH_WriteU32ToBig( void *dst, posh_u32_t value )
{
posh_u32_t *p32 = ( posh_u32_t * ) dst;
*p32 = POSH_BigU32(value);
return p32 + 1;
}
/**
* Writes a signed 32-bit value to a big endian buffer
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian signed 32-bit value
@returns a pointer to the location four bytes after dst
@remarks does no validation of the inputs. This simply calls
POSH_WriteU32ToBig() with appropriate casting.
*/
posh_i32_t *
POSH_WriteI32ToBig( void *dst, posh_i32_t value )
{
return ( posh_i32_t * ) POSH_WriteU32ToBig( dst, ( posh_u32_t ) value );
}
#if defined POSH_64BIT_INTEGER
/**
* Writes an unsigned 64-bit value to a little-endian buffer
@ingroup SixtyFourBit
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian unsigned 64-bit value
@returns a pointer to the location eight bytes after dst
@remarks does no validation of the inputs.
*/
posh_u64_t *
POSH_WriteU64ToLittle( void *dst, posh_u64_t value )
{
posh_u64_t *p64 = ( posh_u64_t * ) dst;
*p64 = POSH_LittleU64(value);
return p64 + 1;
}
/**
* Writes a signed 64-bit value to a little-endian buffer
@ingroup SixtyFourBit
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian unsigned 64-bit value
@returns a pointer to the location eight bytes after dst
@remarks does no validation of the inputs.
*/
posh_i64_t *
POSH_WriteI64ToLittle( void *dst, posh_i64_t value )
{
return ( posh_i64_t * ) POSH_WriteU64ToLittle( dst, ( posh_u64_t ) value );
}
/**
* Writes an unsigned 64-bit value to a big-endian buffer
@ingroup SixtyFourBit
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian unsigned 64-bit value
@returns a pointer to the location eight bytes after dst
@remarks does no validation of the inputs.
*/
posh_u64_t *
POSH_WriteU64ToBig( void *dst, posh_u64_t value )
{
posh_u64_t *p64 = ( posh_u64_t * ) dst;
*p64 = POSH_BigU64(value);
return p64 + 8;
}
/**
* Writes a signed 64-bit value to a big-endian buffer
@ingroup SixtyFourBit
@param dst [out] pointer to the destination buffer, may not be NULL
@param value [in] host-endian signed 64-bit value
@returns a pointer to the location eight bytes after dst
@remarks does no validation of the inputs.
*/
posh_i64_t *
POSH_WriteI64ToBig( void *dst, posh_i64_t value )
{
return ( posh_i64_t * ) POSH_WriteU64ToBig( dst, ( posh_u64_t ) value );
}
#endif /* POSH_64BIT_INTEGER */
/* ---------------------------------------------------------------------------*/
/* IN-MEMORY DESERIALIZATION */
/* ---------------------------------------------------------------------------*/
/**
* Reads an unsigned 16-bit value from a little-endian buffer
@param src [in] source buffer
@returns host-endian unsigned 16-bit value
*/
posh_u16_t
POSH_ReadU16FromLittle( const void *src )
{
return POSH_LittleU16( (*(const posh_u16_t*)src) );
}
/**
* Reads a signed 16-bit value from a little-endian buffer
@param src [in] source buffer
@returns host-endian signed 16-bit value
*/
posh_i16_t
POSH_ReadI16FromLittle( const void *src )
{
return POSH_LittleI16( (*(const posh_i16_t*)src) );
}
/**
* Reads an unsigned 32-bit value from a little-endian buffer
@param src [in] source buffer
@returns host-endian unsigned 32-bit value
*/
posh_u32_t
POSH_ReadU32FromLittle( const void *src )
{
return POSH_LittleU32( (*(const posh_u32_t*)src) );
}
/**
* Reads a signed 32-bit value from a little-endian buffer
@param src [in] source buffer
@returns host-endian signed 32-bit value
*/
posh_i32_t
POSH_ReadI32FromLittle( const void *src )
{
return POSH_LittleI32( (*(const posh_i32_t*)src) );
}
/**
* Reads an unsigned 16-bit value from a big-endian buffer
@param src [in] source buffer
@returns host-endian unsigned 16-bit value
*/
posh_u16_t
POSH_ReadU16FromBig( const void *src )
{
return POSH_BigU16( (*(const posh_u16_t*)src) );
}
/**
* Reads a signed 16-bit value from a big-endian buffer
@param src [in] source buffer
@returns host-endian signed 16-bit value
*/
posh_i16_t
POSH_ReadI16FromBig( const void *src )
{
return POSH_BigI16( (*(const posh_i16_t*)src));
}
/**
* Reads an unsigned 32-bit value from a big-endian buffer
@param src [in] source buffer
@returns host-endian unsigned 32-bit value
*/
posh_u32_t
POSH_ReadU32FromBig( const void *src )
{
return POSH_BigU32( (*(const posh_u32_t*)src) );
}
/**
* Reads a signed 32-bit value from a big-endian buffer
@param src [in] source buffer
@returns host-endian signed 32-bit value
*/
posh_i32_t
POSH_ReadI32FromBig( const void *src )
{
return POSH_BigI32( (*(const posh_i32_t*)src ) );
}
#if defined POSH_64BIT_INTEGER
/**
* Reads an unsigned 64-bit value from a little-endian buffer
@param src [in] source buffer
@returns host-endian unsigned 32-bit value
*/
posh_u64_t
POSH_ReadU64FromLittle( const void *src )
{
return POSH_LittleU64( (*(const posh_u64_t*)src) );
}
/**
* Reads a signed 64-bit value from a little-endian buffer
@param src [in] source buffer
@returns host-endian signed 32-bit value
*/
posh_i64_t
POSH_ReadI64FromLittle( const void *src )
{
return POSH_LittleI64( (*(const posh_i64_t*)src) );
}
/**
* Reads an unsigned 64-bit value from a big-endian buffer
@param src [in] source buffer
@returns host-endian unsigned 32-bit value
*/
posh_u64_t
POSH_ReadU64FromBig( const void *src )
{
return POSH_BigU64( (*(const posh_u64_t*)src) );
}
/**
* Reads an signed 64-bit value from a big-endian buffer
@param src [in] source buffer
@returns host-endian signed 32-bit value
*/
posh_i64_t
POSH_ReadI64FromBig( const void *src )
{
return POSH_BigI64( (*(const posh_i64_t*)src) );
}
#endif /* POSH_64BIT_INTEGER */
/* ---------------------------------------------------------------------------*/
/* FLOATING POINT SUPPORT */
/* ---------------------------------------------------------------------------*/
#if !defined POSH_NO_FLOAT
/** @ingroup FloatingPoint
@param[in] f floating point value
@returns a little-endian bit representation of f
*/
posh_u32_t
POSH_LittleFloatBits( float f )
{
union
{
float f32;
posh_u32_t u32;
} u;
u.f32 = f;
#if defined POSH_LITTLE_ENDIAN
return u.u32;
#else
return POSH_SwapU32( u.u32 );
#endif
}
/**
* Extracts raw big-endian bits from a 32-bit floating point value
*
@ingroup FloatingPoint
@param f [in] floating point value
@returns a big-endian bit representation of f
*/
posh_u32_t
POSH_BigFloatBits( float f )
{
union
{
float f32;
posh_u32_t u32;
} u;
u.f32 = f;
#if defined POSH_LITTLE_ENDIAN
return POSH_SwapU32( u.u32 );
#else
return u.u32;
#endif
}
/**
* Extracts raw, little-endian bit representation from a 64-bit double.
*
@param d [in] 64-bit double precision value
@param dst [out] 8-byte storage buffer
@ingroup FloatingPoint
@returns the raw bits used to represent the value 'd', in the form dst[0]=LSB
*/
void
POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] )
{
union
{
double d64;
posh_byte_t bytes[ 8 ];
} u;
u.d64 = d;
#if defined POSH_LITTLE_ENDIAN
dst[ 0 ] = u.bytes[ 0 ];
dst[ 1 ] = u.bytes[ 1 ];
dst[ 2 ] = u.bytes[ 2 ];
dst[ 3 ] = u.bytes[ 3 ];
dst[ 4 ] = u.bytes[ 4 ];
dst[ 5 ] = u.bytes[ 5 ];
dst[ 6 ] = u.bytes[ 6 ];
dst[ 7 ] = u.bytes[ 7 ];
#else
dst[ 0 ] = u.bytes[ 7 ];
dst[ 1 ] = u.bytes[ 6 ];
dst[ 2 ] = u.bytes[ 5 ];
dst[ 3 ] = u.bytes[ 4 ];
dst[ 4 ] = u.bytes[ 3 ];
dst[ 5 ] = u.bytes[ 2 ];
dst[ 6 ] = u.bytes[ 1 ];
dst[ 7 ] = u.bytes[ 0 ];
#endif
}
/**
* Creates a double-precision, 64-bit floating point value from a set of raw,
* little-endian bits
@ingroup FloatingPoint
@param src [in] little-endian byte representation of 64-bit double precision
floating point value
@returns double precision floating point representation of the raw bits
@remarks No error checking is performed, so there are no guarantees that the
result is a valid number, nor is there any check to ensure that src is
non-NULL. BE CAREFUL USING THIS.
*/
double
POSH_DoubleFromBits( const posh_byte_t src[ 8 ] )
{
union
{
double d64;
posh_byte_t bytes[ 8 ];
} u;
#if defined POSH_LITTLE_ENDIAN
u.bytes[ 0 ] = src[ 0 ];
u.bytes[ 1 ] = src[ 1 ];
u.bytes[ 2 ] = src[ 2 ];
u.bytes[ 3 ] = src[ 3 ];
u.bytes[ 4 ] = src[ 4 ];
u.bytes[ 5 ] = src[ 5 ];
u.bytes[ 6 ] = src[ 6 ];
u.bytes[ 7 ] = src[ 7 ];
#else
u.bytes[ 0 ] = src[ 7 ];
u.bytes[ 1 ] = src[ 6 ];
u.bytes[ 2 ] = src[ 5 ];
u.bytes[ 3 ] = src[ 4 ];
u.bytes[ 4 ] = src[ 3 ];
u.bytes[ 5 ] = src[ 2 ];
u.bytes[ 6 ] = src[ 1 ];
u.bytes[ 7 ] = src[ 0 ];
#endif
return u.d64;
}
/**
* Creates a floating point number from little endian bits
*
@ingroup FloatingPoint
@param bits [in] raw floating point bits in little-endian form
@returns a floating point number based on the given bit representation
@remarks No error checking is performed, so there are no guarantees that the
result is a valid number. BE CAREFUL USING THIS.
*/
float
POSH_FloatFromLittleBits( posh_u32_t bits )
{
union
{
float f32;
posh_u32_t u32;
} u;
u.u32 = bits;
#if defined POSH_BIG_ENDIAN
u.u32 = POSH_SwapU32( u.u32 );
#endif
return u.f32;
}
/**
* Creates a floating point number from big-endian bits
*
@ingroup FloatingPoint
@param bits [in] raw floating point bits in big-endian form
@returns a floating point number based on the given bit representation
@remarks No error checking is performed, so there are no guarantees that the
result is a valid number. BE CAREFUL USING THIS.
*/
float
POSH_FloatFromBigBits( posh_u32_t bits )
{
union
{
float f32;
posh_u32_t u32;
} u;
u.u32 = bits;
#if defined POSH_LITTLE_ENDIAN
u.u32 = POSH_SwapU32( u.u32 );
#endif
return u.f32;
}
#endif /* !defined POSH_NO_FLOAT */

989
src/nvcore/poshlib/posh.h Normal file
View File

@ -0,0 +1,989 @@
/**
@file posh.h
@author Brian Hook
Header file for POSH, the Portable Open Source Harness project.
NOTE: Unlike most header files, this one is designed to be included
multiple times, which is why it does not have the @#ifndef/@#define
preamble.
POSH relies on environment specified preprocessor symbols in order
to infer as much as possible about the target OS/architecture and
the host compiler capabilities.
NOTE: POSH is simple and focused. It attempts to provide basic
functionality and information, but it does NOT attempt to emulate
missing functionality. I am also not willing to make POSH dirty
and hackish to support truly ancient and/or outmoded and/or bizarre
technologies such as non-ANSI compilers, systems with non-IEEE
floating point formats, segmented 16-bit operating systems, etc.
Please refer to the accompanying HTML documentation or visit
http://www.poshlib.org for more information on how to use POSH.
LICENSE:
Copyright (c) 2004, Brian Hook
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* The names of this package'ss contributors contributors may not
be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
I have yet to find an authoritative reference on preprocessor
symbols, but so far this is what I've gleaned:
GNU GCC/G++:
- __GNUC__: GNU C version
- __GNUG__: GNU C++ compiler
- __sun__ : on Sun platforms
- __svr4__: on Solaris and other SysV R4 platforms
- __mips__: on MIPS processor platforms
- __sparc_v9__: on Sparc 64-bit CPUs
- __sparcv9: 64-bit Solaris
- __MIPSEL__: mips processor, compiled for little endian
- __MIPSEB__: mips processor, compiled for big endian
- _R5900: MIPS/Sony/Toshiba R5900 (PS2)
- mc68000: 68K
- m68000: 68K
- m68k: 68K
- __palmos__: PalmOS
Intel C/C++ Compiler:
- __ECC : compiler version, IA64 only
- __EDG__
- __ELF__
- __GXX_ABI_VERSION
- __i386 : IA-32 only
- __i386__ : IA-32 only
- i386 : IA-32 only
- __ia64 : IA-64 only
- __ia64__ : IA-64 only
- ia64 : IA-64 only
- __ICC : IA-32 only
- __INTEL_COMPILER : IA-32 or IA-64, newer versions only
Apple's C/C++ Compiler for OS X:
- __APPLE_CC__
- __APPLE__
- __BIG_ENDIAN__
- __APPLE__
- __ppc__
- __MACH__
DJGPP:
- __MSDOS__
- __unix__
- __unix
- __GNUC__
- __GO32
- DJGPP
- __i386, __i386, i386
Cray's C compiler:
- _ADDR64: if 64-bit pointers
- _UNICOS:
- __unix:
SGI's CC compiler predefines the following (and more) with -ansi:
- __sgi
- __unix
- __host_mips
- _SYSTYPE_SVR4
- __mips
- _MIPSEB
- anyone know if there is a predefined symbol for the compiler?!
MinGW:
- as GnuC but also defines _WIN32, __WIN32, WIN32, _X86_, __i386, __i386__, and several others
- __MINGW32__
Cygwin:
- as Gnu C, but also
- __unix__
- __CYGWIN32__
Microsoft Visual Studio predefines the following:
- _MSC_VER
- _WIN32: on Win32
- _M_IX6 (on x86 systems)
- _M_ALPHA (on DEC AXP systems)
- _SH3: WinCE, Hitachi SH-3
- _MIPS: WinCE, MIPS
- _ARM: WinCE, ARM
Sun's C Compiler:
- sun and _sun
- unix and _unix
- sparc and _sparc (SPARC systems only)
- i386 and _i386 (x86 systems only)
- __SVR4 (Solaris only)
- __sparcv9: 64-bit solaris
- __SUNPRO_C
- _LP64: defined in 64-bit LP64 mode, but only if <sys/types.h> is included
Borland C/C++ predefines the following:
- __BORLANDC__:
DEC/Compaq C/C++ on Alpha:
- __alpha
- __arch64__
- __unix__ (on Tru64 Unix)
- __osf__
- __DECC
- __DECCXX (C++ compilation)
- __DECC_VER
- __DECCXX_VER
IBM's AIX compiler:
- __64BIT__ if 64-bit mode
- _AIX
- __IBMC__: C compiler version
- __IBMCPP__: C++ compiler version
- _LONG_LONG: compiler allows long long
Watcom:
- __WATCOMC__
- __DOS__ : if targeting DOS
- __386__ : if 32-bit support
- __WIN32__ : if targetin 32-bit Windows
HP-UX C/C++ Compiler:
- __hpux
- __unix
- __hppa (on PA-RISC)
- __LP64__: if compiled in 64-bit mode
Metrowerks:
- __MWERKS__
- __powerpc__
- _powerc
- __MC68K__
- macintosh when compiling for MacOS
- __INTEL__ for x86 targets
- __POWERPC__
*/
#ifndef HAVE_POSH_H
#define HAVE_POSH_H
/*
** ----------------------------------------------------------------------------
** Include <limits.h> optionally
** ----------------------------------------------------------------------------
*/
#ifdef POSH_USE_LIMITS_H
# include <limits.h>
#endif
/*
** ----------------------------------------------------------------------------
** Determine compilation environment
** ----------------------------------------------------------------------------
*/
#if defined __ECC || defined __ICC || defined __INTEL_COMPILER
# define POSH_COMPILER_STRING "Intel C/C++"
# define POSH_COMPILER_INTEL 1
#endif
#if ( defined __host_mips || defined __sgi ) && !defined __GNUC__
# define POSH_COMPILER_STRING "MIPSpro C/C++"
# define POSH_COMPILER_MIPSPRO 1
#endif
#if defined __hpux && !defined __GNUC__
# define POSH_COMPILER_STRING "HP-UX CC"
# define POSH_COMPILER_HPCC 1
#endif
#if defined __GNUC__
# define POSH_COMPILER_STRING "Gnu GCC"
# define POSH_COMPILER_GCC 1
#endif
#if defined __APPLE_CC__
/* we don't define the compiler string here, let it be GNU */
# define POSH_COMPILER_APPLECC 1
#endif
#if defined __IBMC__ || defined __IBMCPP__
# define POSH_COMPILER_STRING "IBM C/C++"
# define POSH_COMPILER_IBM 1
#endif
#if defined _MSC_VER
# define POSH_COMPILER_STRING "Microsoft Visual C++"
# define POSH_COMPILER_MSVC 1
#endif
#if defined __SUNPRO_C
# define POSH_COMPILER_STRING "Sun Pro"
# define POSH_COMPILER_SUN 1
#endif
#if defined __BORLANDC__
# define POSH_COMPILER_STRING "Borland C/C++"
# define POSH_COMPILER_BORLAND 1
#endif
#if defined __MWERKS__
# define POSH_COMPILER_STRING "MetroWerks CodeWarrior"
# define POSH_COMPILER_METROWERKS 1
#endif
#if defined __DECC || defined __DECCXX
# define POSH_COMPILER_STRING "Compaq/DEC C/C++"
# define POSH_COMPILER_DEC 1
#endif
#if defined __WATCOMC__
# define POSH_COMPILER_STRING "Watcom C/C++"
# define POSH_COMPILER_WATCOM 1
#endif
#if !defined POSH_COMPILER_STRING
# define POSH_COMPILER_STRING "Unknown compiler"
#endif
/*
** ----------------------------------------------------------------------------
** Determine target operating system
** ----------------------------------------------------------------------------
*/
#if defined linux || defined __linux__
# define POSH_OS_LINUX 1
# define POSH_OS_STRING "Linux"
#endif
#if defined __CYGWIN32__
# define POSH_OS_CYGWIN32 1
# define POSH_OS_STRING "Cygwin"
#endif
#if defined __MINGW32__
# define POSH_OS_MINGW 1
# define POSH_OS_STRING "MinGW"
#endif
#if defined GO32 && defined DJGPP && defined __MSDOS__
# define POSH_OS_GO32 1
# define POSH_OS_STRING "GO32/MS-DOS"
#endif
/* NOTE: make sure you use /bt=DOS if compiling for 32-bit DOS,
otherwise Watcom assumes host=target */
#if defined __WATCOMC__ && defined __386__ && defined __DOS__
# define POSH_OS_DOS32 1
# define POSH_OS_STRING "DOS/32-bit"
#endif
#if defined _UNICOS
# define POSH_OS_UNICOS 1
# define POSH_OS_STRING "UNICOS"
#endif
#if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
# define POSH_OS_OSX 1
# define POSH_OS_STRING "MacOS X"
#endif
#if defined __sun__ || defined sun || defined __sun || defined __solaris__
# if defined __SVR4 || defined __svr4__ || defined __solaris__
# define POSH_OS_STRING "Solaris"
# define POSH_OS_SOLARIS 1
# endif
# if !defined POSH_OS_STRING
# define POSH_OS_STRING "SunOS"
# define POSH_OS_SUNOS 1
# endif
#endif
#if defined __sgi__ || defined sgi || defined __sgi
# define POSH_OS_IRIX 1
# define POSH_OS_STRING "Irix"
#endif
#if defined __hpux__ || defined __hpux
# define POSH_OS_HPUX 1
# define POSH_OS_STRING "HP-UX"
#endif
#if defined _AIX
# define POSH_OS_AIX 1
# define POSH_OS_STRING "AIX"
#endif
#if ( defined __alpha && defined __osf__ )
# define POSH_OS_TRU64 1
# define POSH_OS_STRING "Tru64"
#endif
#if defined __BEOS__ || defined __beos__
# define POSH_OS_BEOS 1
# define POSH_OS_STRING "BeOS"
#endif
#if defined amiga || defined amigados || defined AMIGA || defined _AMIGA
# define POSH_OS_AMIGA 1
# define POSH_OS_STRING "Amiga"
#endif
#if defined __unix__
# define POSH_OS_UNIX 1
# if !defined POSH_OS_STRING
# define POSH_OS_STRING "Unix-like(generic)"
# endif
#endif
#if defined _WIN32_WCE
# define POSH_OS_WINCE 1
# define POSH_OS_STRING "Windows CE"
#endif
#if defined _XBOX_VER == 200
# define POSH_OS_XBOX360 1
# define POSH_OS_STRING "XBOX-360"
#elif defined _XBOX
# define POSH_OS_XBOX 1
# define POSH_OS_STRING "XBOX"
#endif
#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__
# define POSH_OS_WIN32 1
# if !defined POSH_OS_XBOX
# if defined _WIN64
# define POSH_OS_WIN64 1
# define POSH_OS_STRING "Win64"
# else
# if !defined POSH_OS_STRING
# define POSH_OS_STRING "Win32"
# endif
# endif
# endif
#endif
#if defined __palmos__
# define POSH_OS_PALM 1
# define POSH_OS_STRING "PalmOS"
#endif
#if defined THINK_C || defined macintosh
# define POSH_OS_MACOS 1
# define POSH_OS_STRING "MacOS"
#endif
/*
** -----------------------------------------------------------------------------
** Determine target CPU
** -----------------------------------------------------------------------------
*/
#if defined mc68000 || defined m68k || defined __MC68K__ || defined m68000
# define POSH_CPU_68K 1
# define POSH_CPU_STRING "MC68000"
#endif
#if defined __PPC__ || defined __POWERPC__ || defined powerpc || defined _POWER || defined __ppc__ || defined __powerpc__
# define POSH_CPU_PPC 1
# if defined __powerpc64__
# define POSH_CPU_STRING "PowerPC64"
# else
# define POSH_CPU_STRING "PowerPC"
# endif
#endif
#if defined _CRAYT3E || defined _CRAYMPP
# define POSH_CPU_CRAYT3E 1 /* target processor is a DEC Alpha 21164 used in a Cray T3E*/
# define POSH_CPU_STRING "Cray T3E (Alpha 21164)"
#endif
#if defined CRAY || defined _CRAY && !defined _CRAYT3E
# error Non-AXP Cray systems not supported
#endif
#if defined _SH3
# define POSH_CPU_SH3 1
# define POSH_CPU_STRING "Hitachi SH-3"
#endif
#if defined __sh4__ || defined __SH4__
# define POSH_CPU_SH3 1
# define POSH_CPU_SH4 1
# define POSH_CPU_STRING "Hitachi SH-4"
#endif
#if defined __sparc__ || defined __sparc
# if defined __arch64__ || defined __sparcv9 || defined __sparc_v9__
# define POSH_CPU_SPARC64 1
# define POSH_CPU_STRING "Sparc/64"
# else
# define POSH_CPU_STRING "Sparc/32"
# endif
# define POSH_CPU_SPARC 1
#endif
#if defined ARM || defined __arm__ || defined _ARM
# define POSH_CPU_STRONGARM 1
# define POSH_CPU_STRING "ARM"
#endif
#if defined mips || defined __mips__ || defined __MIPS__ || defined _MIPS
# define POSH_CPU_MIPS 1
# if defined _R5900
# define POSH_CPU_STRING "MIPS R5900 (PS2)"
# else
# define POSH_CPU_STRING "MIPS"
# endif
#endif
#if defined __ia64 || defined _M_IA64 || defined __ia64__
# define POSH_CPU_IA64 1
# define POSH_CPU_STRING "IA64"
#endif
#if defined __X86__ || defined __i386__ || defined i386 || defined _M_IX86 || defined __386__ || defined __x86_64__ || defined _M_X64
# define POSH_CPU_X86 1
# if defined __x86_64__ || defined _M_X64
# define POSH_CPU_X86_64 1
# endif
# if defined POSH_CPU_X86_64
# define POSH_CPU_STRING "AMD x86-64"
# else
# define POSH_CPU_STRING "Intel 386+"
# endif
#endif
#if defined __alpha || defined alpha || defined _M_ALPHA || defined __alpha__
# define POSH_CPU_AXP 1
# define POSH_CPU_STRING "AXP"
#endif
#if defined __hppa || defined hppa
# define POSH_CPU_HPPA 1
# define POSH_CPU_STRING "PA-RISC"
#endif
#if !defined POSH_CPU_STRING
# error POSH cannot determine target CPU
# define POSH_CPU_STRING "Unknown" /* this is here for Doxygen's benefit */
#endif
/*
** -----------------------------------------------------------------------------
** Attempt to autodetect building for embedded on Sony PS2
** -----------------------------------------------------------------------------
*/
#if !defined POSH_OS_STRING
# if !defined FORCE_DOXYGEN
# define POSH_OS_EMBEDDED 1
# endif
# if defined _R5900
# define POSH_OS_STRING "Sony PS2(embedded)"
# else
# define POSH_OS_STRING "Embedded/Unknown"
# endif
#endif
/*
** ---------------------------------------------------------------------------
** Handle cdecl, stdcall, fastcall, etc.
** ---------------------------------------------------------------------------
*/
#if defined POSH_CPU_X86 && !defined POSH_CPU_X86_64
# if defined __GNUC__
# define POSH_CDECL __attribute__((cdecl))
# define POSH_STDCALL __attribute__((stdcall))
# define POSH_FASTCALL __attribute__((fastcall))
# elif ( defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__ || defined __MWERKS__ )
# define POSH_CDECL __cdecl
# define POSH_STDCALL __stdcall
# define POSH_FASTCALL __fastcall
# endif
#else
# define POSH_CDECL
# define POSH_STDCALL
# define POSH_FASTCALL
#endif
/*
** ---------------------------------------------------------------------------
** Define POSH_IMPORTEXPORT signature based on POSH_DLL and POSH_BUILDING_LIB
** ---------------------------------------------------------------------------
*/
/*
** We undefine this so that multiple inclusions will work
*/
#if defined POSH_IMPORTEXPORT
# undef POSH_IMPORTEXPORT
#endif
#if defined POSH_DLL
# if defined POSH_OS_WIN32
# if defined _MSC_VER
# if ( _MSC_VER >= 800 )
# if defined POSH_BUILDING_LIB
# define POSH_IMPORTEXPORT __declspec( dllexport )
# else
# define POSH_IMPORTEXPORT __declspec( dllimport )
# endif
# else
# if defined POSH_BUILDING_LIB
# define POSH_IMPORTEXPORT __export
# else
# define POSH_IMPORTEXPORT
# endif
# endif
# endif /* defined _MSC_VER */
# if defined __BORLANDC__
# if ( __BORLANDC__ >= 0x500 )
# if defined POSH_BUILDING_LIB
# define POSH_IMPORTEXPORT __declspec( dllexport )
# else
# define POSH_IMPORTEXPORT __declspec( dllimport )
# endif
# else
# if defined POSH_BUILDING_LIB
# define POSH_IMPORTEXPORT __export
# else
# define POSH_IMPORTEXPORT
# endif
# endif
# endif /* defined __BORLANDC__ */
/* for all other compilers, we're just making a blanket assumption */
# if defined __GNUC__ || defined __WATCOMC__ || defined __MWERKS__
# if defined POSH_BUILDING_LIB
# define POSH_IMPORTEXPORT __declspec( dllexport )
# else
# define POSH_IMPORTEXPORT __declspec( dllimport )
# endif
# endif /* all other compilers */
# if !defined POSH_IMPORTEXPORT
# error Building DLLs not supported on this compiler (poshlib@poshlib.org if you know how)
# endif
# endif /* defined POSH_OS_WIN32 */
#endif
/* On pretty much everything else, we can thankfully just ignore this */
#if !defined POSH_IMPORTEXPORT
# define POSH_IMPORTEXPORT
#endif
#if defined FORCE_DOXYGEN
# define POSH_DLL
# define POSH_BUILDING_LIB
# undef POSH_DLL
# undef POSH_BUILDING_LIB
#endif
/*
** ----------------------------------------------------------------------------
** (Re)define POSH_PUBLIC_API export signature
** ----------------------------------------------------------------------------
*/
#ifdef POSH_PUBLIC_API
# undef POSH_PUBLIC_API
#endif
#if ( ( defined _MSC_VER ) && ( _MSC_VER < 800 ) ) || ( defined __BORLANDC__ && ( __BORLANDC__ < 0x500 ) )
# define POSH_PUBLIC_API(rtype) extern rtype POSH_IMPORTEXPORT
#else
# define POSH_PUBLIC_API(rtype) extern POSH_IMPORTEXPORT rtype
#endif
/*
** ----------------------------------------------------------------------------
** Try to infer endianess. Basically we just go through the CPUs we know are
** little endian, and assume anything that isn't one of those is big endian.
** As a sanity check, we also do this with operating systems we know are
** little endian, such as Windows. Some processors are bi-endian, such as
** the MIPS series, so we have to be careful about those.
** ----------------------------------------------------------------------------
*/
#if defined POSH_CPU_X86 || defined POSH_CPU_AXP || defined POSH_CPU_STRONGARM || defined POSH_OS_WIN32 || defined POSH_OS_WINCE || defined __MIPSEL__
# define POSH_ENDIAN_STRING "little"
# define POSH_LITTLE_ENDIAN 1
#else
# define POSH_ENDIAN_STRING "big"
# define POSH_BIG_ENDIAN 1
#endif
#if defined FORCE_DOXYGEN
# define POSH_LITTLE_ENDIAN
#endif
/*
** ----------------------------------------------------------------------------
** Cross-platform compile time assertion macro
** ----------------------------------------------------------------------------
*/
#define POSH_COMPILE_TIME_ASSERT(name, x) typedef int _POSH_dummy_ ## name[(x) ? 1 : -1 ]
/*
** ----------------------------------------------------------------------------
** 64-bit Integer
**
** We don't require 64-bit support, nor do we emulate its functionality, we
** simply export it if it's available. Since we can't count on <limits.h>
** for 64-bit support, we ignore the POSH_USE_LIMITS_H directive.
** ----------------------------------------------------------------------------
*/
#if defined ( __LP64__ ) || defined ( __powerpc64__ ) || defined POSH_CPU_SPARC64
# define POSH_64BIT_INTEGER 1
typedef long posh_i64_t;
typedef unsigned long posh_u64_t;
# define POSH_I64( x ) ((posh_i64_t)x)
# define POSH_U64( x ) ((posh_u64_t)x)
# define POSH_I64_PRINTF_PREFIX "l"
#elif defined _MSC_VER || defined __BORLANDC__ || defined __WATCOMC__ || ( defined __alpha && defined __DECC )
# define POSH_64BIT_INTEGER 1
typedef __int64 posh_i64_t;
typedef unsigned __int64 posh_u64_t;
# define POSH_I64( x ) ((posh_i64_t)x)
# define POSH_U64( x ) ((posh_u64_t)x)
# define POSH_I64_PRINTF_PREFIX "I64"
#elif defined __GNUC__ || defined __MWERKS__ || defined __SUNPRO_C || defined __SUNPRO_CC || defined __APPLE_CC__ || defined POSH_OS_IRIX || defined _LONG_LONG || defined _CRAYC
# define POSH_64BIT_INTEGER 1
typedef long long posh_i64_t;
typedef unsigned long long posh_u64_t;
# define POSH_U64( x ) ((posh_u64_t)(x##LL))
# define POSH_I64( x ) ((posh_i64_t)(x##LL))
# define POSH_I64_PRINTF_PREFIX "ll"
#endif
/* hack */
#ifdef __MINGW32__
# undef POSH_I64
# undef POSH_U64
# undef POSH_I64_PRINTF_PREFIX
# define POSH_U64( x ) ((posh_u64_t)(x##LL))
# define POSH_I64( x ) ((posh_i64_t)(x##LL))
# define POSH_I64_PRINTF_PREFIX "I64"
#endif
#ifdef FORCE_DOXYGEN
typedef long long posh_i64_t;
typedef unsigned long posh_u64_t;
# define POSH_64BIT_INTEGER
# define POSH_I64_PRINTF_PREFIX
# define POSH_I64(x)
# define POSH_U64(x)
#endif
/** Minimum value for a 64-bit signed integer */
#define POSH_I64_MIN POSH_I64(0x8000000000000000)
/** Maximum value for a 64-bit signed integer */
#define POSH_I64_MAX POSH_I64(0x7FFFFFFFFFFFFFFF)
/** Minimum value for a 64-bit unsigned integer */
#define POSH_U64_MIN POSH_U64(0)
/** Maximum value for a 64-bit unsigned integer */
#define POSH_U64_MAX POSH_U64(0xFFFFFFFFFFFFFFFF)
/* ----------------------------------------------------------------------------
** Basic Sized Types
**
** These types are expected to be EXACTLY sized so you can use them for
** serialization.
** ----------------------------------------------------------------------------
*/
#define POSH_FALSE 0
#define POSH_TRUE 1
typedef int posh_bool_t;
typedef unsigned char posh_byte_t;
/* NOTE: These assume that CHAR_BIT is 8!! */
typedef unsigned char posh_u8_t;
typedef signed char posh_i8_t;
#if defined POSH_USE_LIMITS_H
# if CHAR_BITS > 8
# error This machine uses 9-bit characters. This is a warning, you can comment this out now.
# endif /* CHAR_BITS > 8 */
/* 16-bit */
# if ( USHRT_MAX == 65535 )
typedef unsigned short posh_u16_t;
typedef short posh_i16_t;
# else
/* Yes, in theory there could still be a 16-bit character type and shorts are
32-bits in size...if you find such an architecture, let me know =P */
# error No 16-bit type found
# endif
/* 32-bit */
# if ( INT_MAX == 2147483647 )
typedef unsigned posh_u32_t;
typedef int posh_i32_t;
# elif ( LONG_MAX == 2147483647 )
typedef unsigned long posh_u32_t;
typedef long posh_i32_t;
# else
error No 32-bit type found
# endif
#else /* POSH_USE_LIMITS_H */
typedef unsigned short posh_u16_t;
typedef short posh_i16_t;
# if !defined POSH_OS_PALM
typedef unsigned posh_u32_t;
typedef int posh_i32_t;
# else
typedef unsigned long posh_u32_t;
typedef long posh_i32_t;
# endif
#endif
/** Minimum value for a byte */
#define POSH_BYTE_MIN 0
/** Maximum value for an 8-bit unsigned value */
#define POSH_BYTE_MAX 255
/** Minimum value for a byte */
#define POSH_I16_MIN ( ( posh_i16_t ) 0x8000 )
/** Maximum value for a 16-bit signed value */
#define POSH_I16_MAX ( ( posh_i16_t ) 0x7FFF )
/** Minimum value for a 16-bit unsigned value */
#define POSH_U16_MIN 0
/** Maximum value for a 16-bit unsigned value */
#define POSH_U16_MAX ( ( posh_u16_t ) 0xFFFF )
/** Minimum value for a 32-bit signed value */
#define POSH_I32_MIN ( ( posh_i32_t ) 0x80000000 )
/** Maximum value for a 32-bit signed value */
#define POSH_I32_MAX ( ( posh_i32_t ) 0x7FFFFFFF )
/** Minimum value for a 32-bit unsigned value */
#define POSH_U32_MIN 0
/** Maximum value for a 32-bit unsigned value */
#define POSH_U32_MAX ( ( posh_u32_t ) 0xFFFFFFFF )
/*
** ----------------------------------------------------------------------------
** Sanity checks on expected sizes
** ----------------------------------------------------------------------------
*/
#if !defined FORCE_DOXYGEN
POSH_COMPILE_TIME_ASSERT(posh_byte_t, sizeof(posh_byte_t) == 1);
POSH_COMPILE_TIME_ASSERT(posh_u8_t, sizeof(posh_u8_t) == 1);
POSH_COMPILE_TIME_ASSERT(posh_i8_t, sizeof(posh_i8_t) == 1);
POSH_COMPILE_TIME_ASSERT(posh_u16_t, sizeof(posh_u16_t) == 2);
POSH_COMPILE_TIME_ASSERT(posh_i16_t, sizeof(posh_i16_t) == 2);
POSH_COMPILE_TIME_ASSERT(posh_u32_t, sizeof(posh_u32_t) == 4);
POSH_COMPILE_TIME_ASSERT(posh_i32_t, sizeof(posh_i32_t) == 4);
#if !defined POSH_NO_FLOAT
POSH_COMPILE_TIME_ASSERT(posh_testfloat_t, sizeof(float)==4 );
POSH_COMPILE_TIME_ASSERT(posh_testdouble_t, sizeof(double)==8);
#endif
#if defined POSH_64BIT_INTEGER
POSH_COMPILE_TIME_ASSERT(posh_u64_t, sizeof(posh_u64_t) == 8);
POSH_COMPILE_TIME_ASSERT(posh_i64_t, sizeof(posh_i64_t) == 8);
#endif
#endif
/*
** ----------------------------------------------------------------------------
** 64-bit pointer support
** ----------------------------------------------------------------------------
*/
#if defined POSH_CPU_AXP && ( defined POSH_OS_TRU64 || defined POSH_OS_LINUX )
# define POSH_64BIT_POINTER 1
#endif
#if defined POSH_CPU_X86_64 && defined POSH_OS_LINUX
# define POSH_64BIT_POINTER 1
#endif
#if defined POSH_CPU_SPARC64 || defined POSH_OS_WIN64 || defined __64BIT__ || defined __LP64 || defined _LP64 || defined __LP64__ || defined _ADDR64 || defined _CRAYC
# define POSH_64BIT_POINTER 1
#endif
#if defined POSH_64BIT_POINTER
POSH_COMPILE_TIME_ASSERT( posh_64bit_pointer, sizeof( void * ) == 8 );
#elif !defined FORCE_DOXYGEN
/* if this assertion is hit then you're on a system that either has 64-bit
addressing and we didn't catch it, or you're on a system with 16-bit
pointers. In the latter case, POSH doesn't actually care, we're just
triggering this assertion to make sure you're aware of the situation,
so feel free to delete it.
If this assertion is triggered on a known 32 or 64-bit platform,
please let us know (poshlib@poshlib.org) */
POSH_COMPILE_TIME_ASSERT( posh_32bit_pointer, sizeof( void * ) == 4 );
#endif
#if defined FORCE_DOXYGEN
# define POSH_64BIT_POINTER
#endif
/*
** ----------------------------------------------------------------------------
** POSH Utility Functions
**
** These are optional POSH utility functions that are not required if you don't
** need anything except static checking of your host and target environment.
**
** These functions are NOT wrapped with POSH_PUBLIC_API because I didn't want
** to enforce their export if your own library is only using them internally.
** ----------------------------------------------------------------------------
*/
#ifdef __cplusplus
extern "C" {
#endif
const char *POSH_GetArchString( void );
#if !defined POSH_NO_FLOAT
posh_u32_t POSH_LittleFloatBits( float f );
posh_u32_t POSH_BigFloatBits( float f );
float POSH_FloatFromLittleBits( posh_u32_t bits );
float POSH_FloatFromBigBits( posh_u32_t bits );
void POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] );
double POSH_DoubleFromBits( const posh_byte_t src[ 8 ] );
/* unimplemented
float *POSH_WriteFloatToLittle( void *dst, float f );
float *POSH_WriteFloatToBig( void *dst, float f );
float POSH_ReadFloatFromLittle( const void *src );
float POSH_ReadFloatFromBig( const void *src );
double *POSH_WriteDoubleToLittle( void *dst, double d );
double *POSH_WriteDoubleToBig( void *dst, double d );
double POSH_ReadDoubleFromLittle( const void *src );
double POSH_ReadDoubleFromBig( const void *src );
*/
#endif /* !defined POSH_NO_FLOAT */
#if defined FORCE_DOXYGEN
# define POSH_NO_FLOAT
# undef POSH_NO_FLOAT
#endif
extern posh_u16_t POSH_SwapU16( posh_u16_t u );
extern posh_i16_t POSH_SwapI16( posh_i16_t u );
extern posh_u32_t POSH_SwapU32( posh_u32_t u );
extern posh_i32_t POSH_SwapI32( posh_i32_t u );
#if defined POSH_64BIT_INTEGER
extern posh_u64_t POSH_SwapU64( posh_u64_t u );
extern posh_i64_t POSH_SwapI64( posh_i64_t u );
#endif /*POSH_64BIT_INTEGER */
extern posh_u16_t *POSH_WriteU16ToLittle( void *dst, posh_u16_t value );
extern posh_i16_t *POSH_WriteI16ToLittle( void *dst, posh_i16_t value );
extern posh_u32_t *POSH_WriteU32ToLittle( void *dst, posh_u32_t value );
extern posh_i32_t *POSH_WriteI32ToLittle( void *dst, posh_i32_t value );
extern posh_u16_t *POSH_WriteU16ToBig( void *dst, posh_u16_t value );
extern posh_i16_t *POSH_WriteI16ToBig( void *dst, posh_i16_t value );
extern posh_u32_t *POSH_WriteU32ToBig( void *dst, posh_u32_t value );
extern posh_i32_t *POSH_WriteI32ToBig( void *dst, posh_i32_t value );
extern posh_u16_t POSH_ReadU16FromLittle( const void *src );
extern posh_i16_t POSH_ReadI16FromLittle( const void *src );
extern posh_u32_t POSH_ReadU32FromLittle( const void *src );
extern posh_i32_t POSH_ReadI32FromLittle( const void *src );
extern posh_u16_t POSH_ReadU16FromBig( const void *src );
extern posh_i16_t POSH_ReadI16FromBig( const void *src );
extern posh_u32_t POSH_ReadU32FromBig( const void *src );
extern posh_i32_t POSH_ReadI32FromBig( const void *src );
#if defined POSH_64BIT_INTEGER
extern posh_u64_t *POSH_WriteU64ToLittle( void *dst, posh_u64_t value );
extern posh_i64_t *POSH_WriteI64ToLittle( void *dst, posh_i64_t value );
extern posh_u64_t *POSH_WriteU64ToBig( void *dst, posh_u64_t value );
extern posh_i64_t *POSH_WriteI64ToBig( void *dst, posh_i64_t value );
extern posh_u64_t POSH_ReadU64FromLittle( const void *src );
extern posh_i64_t POSH_ReadI64FromLittle( const void *src );
extern posh_u64_t POSH_ReadU64FromBig( const void *src );
extern posh_i64_t POSH_ReadI64FromBig( const void *src );
#endif /* POSH_64BIT_INTEGER */
#if defined POSH_LITTLE_ENDIAN
# define POSH_LittleU16(x) (x)
# define POSH_LittleU32(x) (x)
# define POSH_LittleI16(x) (x)
# define POSH_LittleI32(x) (x)
# if defined POSH_64BIT_INTEGER
# define POSH_LittleU64(x) (x)
# define POSH_LittleI64(x) (x)
# endif /* defined POSH_64BIT_INTEGER */
# define POSH_BigU16(x) POSH_SwapU16(x)
# define POSH_BigU32(x) POSH_SwapU32(x)
# define POSH_BigI16(x) POSH_SwapI16(x)
# define POSH_BigI32(x) POSH_SwapI32(x)
# if defined POSH_64BIT_INTEGER
# define POSH_BigU64(x) POSH_SwapU64(x)
# define POSH_BigI64(x) POSH_SwapI64(x)
# endif /* defined POSH_64BIT_INTEGER */
#else
# define POSH_BigU16(x) (x)
# define POSH_BigU32(x) (x)
# define POSH_BigI16(x) (x)
# define POSH_BigI32(x) (x)
# if defined POSH_64BIT_INTEGER
# define POSH_BigU64(x) (x)
# define POSH_BigI64(x) (x)
# endif /* POSH_64BIT_INTEGER */
# define POSH_LittleU16(x) POSH_SwapU16(x)
# define POSH_LittleU32(x) POSH_SwapU32(x)
# define POSH_LittleI16(x) POSH_SwapI16(x)
# define POSH_LittleI32(x) POSH_SwapI32(x)
# if defined POSH_64BIT_INTEGER
# define POSH_LittleU64(x) POSH_SwapU64(x)
# define POSH_LittleI64(x) POSH_SwapI64(x)
# endif /* POSH_64BIT_INTEGER */
#endif
#ifdef __cplusplus
}
#endif
#endif /* HAVE_POSH_H */

View File

@ -0,0 +1,29 @@
#include "../../posh.c"
#include <stdio.h>
int main( void )
{
printf( "archtest:\n" );
printf( "--------\n" );
printf( "%s", POSH_GetArchString() );
printf( "byte min: %d\n", POSH_BYTE_MIN );
printf( "byte max: %d\n", POSH_BYTE_MAX );
printf( "i16 min: %d\n", POSH_I16_MIN );
printf( "i16 max: %d\n", POSH_I16_MAX );
printf( "i32 min: %d\n", POSH_I32_MIN );
printf( "i32 max: %d\n", POSH_I32_MAX );
printf( "u16 min: %u\n", POSH_U16_MIN );
printf( "u16 max: %u\n", POSH_U16_MAX );
printf( "u32 min: %u\n", POSH_U32_MIN );
printf( "u32 max: %u\n", POSH_U32_MAX );
#ifdef POSH_64BIT_INTEGER
printf( "i64 min: %"POSH_I64_PRINTF_PREFIX"d\n", POSH_I64_MIN );
printf( "i64 max: %"POSH_I64_PRINTF_PREFIX"d\n", POSH_I64_MAX );
printf( "u64 min: %"POSH_I64_PRINTF_PREFIX"u\n", POSH_U64_MIN );
printf( "u64 max: %"POSH_I64_PRINTF_PREFIX"u\n", POSH_U64_MAX );
#endif
return 0;
}

View File

@ -0,0 +1,23 @@
#include "testlib.hpp"
#include "testdll.h"
#include <stdio.h>
int main( void )
{
printf( "linktest:\n" );
printf( "---------\n" );
printf( "linktest is a simple verification test that tests:\n" );
printf( " * correct linkage between C and C++\n" );
printf( " * proper handling when multiple libs use posh\n" );
printf( " * correct handling of DLL vs. LIB linkage (Windows)\n" );
printf( "\n\n" );
printf( "POSH_GetArchString() reporting:\n%s\n\n", POSH_GetArchString() );
TestLib_Foo();
TestDLL_Foo();
printf( "\n\nlinktest succeeded!\n" );
return 0;
}

View File

@ -0,0 +1,20 @@
#ifndef TESTDLL_H
#define TESTDLL_H
#define POSH_DLL 1 //define this since poshtestdll is a DLL
#include "../../posh.h"
#undef POSH_DLL //undefine so that another include of posh.h doesn't cause problems
#define TESTDLL_PUBLIC_API POSH_PUBLIC_API
#if defined __cplusplus
extern "C" {
#endif
TESTDLL_PUBLIC_API(void) TestDLL_Foo( void );
#if defined __cplusplus
}
#endif
#endif /* TESTDLL_H */

View File

@ -0,0 +1,11 @@
#define POSH_BUILDING_LIB 1
#include "testlib.hpp"
#include <stdio.h>
void TestLib_Foo( void )
{
printf( "...TestLib_Foo called successfully!\n" );
}

View File

@ -0,0 +1,19 @@
#ifndef TESTLIB_HPP
#define TESTLIB_HPP
#undef POSH_DLL
#include "../../posh.h"
#define TESTLIB_PUBLIC_API POSH_PUBLIC_API
#if defined __cplusplus && defined POSH_DLL
extern "C" {
#endif
TESTLIB_PUBLIC_API(void) TestLib_Foo( void );
#if defined __cplusplus && defined POSH_DLL
}
#endif
#endif /* POSHTESTLIB_H */

View File

@ -0,0 +1,54 @@
PROJECT(nvimage)
SUBDIRS(nvtt)
SET(IMAGE_SRCS
nvimage.h
FloatImage.h
FloatImage.cpp
Filter.h
Filter.cpp
Image.h
Image.cpp
ImageIO.h
ImageIO.cpp
ColorBlock.h
ColorBlock.cpp
HoleFilling.h
HoleFilling.cpp
DirectDrawSurface.h
DirectDrawSurface.cpp
Quantize.h
Quantize.cpp
NormalMap.h
NormalMap.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
IF(PNG_FOUND)
SET(LIBS ${LIBS} ${PNG_LIBRARIES})
INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR})
ENDIF(PNG_FOUND)
IF(JPEG_FOUND)
SET(LIBS ${LIBS} ${JPEG_LIBRARIES})
INCLUDE_DIRECTORIES(${JPEG_INCLUDE_DIR})
ENDIF(JPEG_FOUND)
IF(TIFF_FOUND)
SET(LIBS ${LIBS} ${TIFF_LIBRARIES})
INCLUDE_DIRECTORIES(${TIFF_INCLUDE_DIR})
ENDIF(TIFF_FOUND)
# targets
ADD_DEFINITIONS(-DNVIMAGE_EXPORTS)
IF(NVIMAGE_SHARED)
ADD_LIBRARY(nvimage SHARED ${IMAGE_SRCS})
ELSE(NVIMAGE_SHARED)
ADD_LIBRARY(nvimage ${IMAGE_SRCS})
ENDIF(NVIMAGE_SHARED)
TARGET_LINK_LIBRARIES(nvimage ${LIBS} nvcore nvmath posh)

392
src/nvimage/ColorBlock.cpp Normal file
View File

@ -0,0 +1,392 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/Box.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/Image.h>
using namespace nv;
namespace {
// Get approximate luminance.
inline static uint colorLuminance(Color32 c)
{
return c.r + c.g + c.b;
}
// Get the euclidean distance between the given colors.
inline static uint colorDistance(Color32 c0, Color32 c1)
{
return (c0.r - c1.r) * (c0.r - c1.r) + (c0.g - c1.g) * (c0.g - c1.g) + (c0.b - c1.b) * (c0.b - c1.b);
}
} // namespace`
/// Default constructor.
ColorBlock::ColorBlock()
{
}
/// Init the color block with the contents of the given block.
ColorBlock::ColorBlock(const ColorBlock & block)
{
for(uint i = 0; i < 16; i++) {
color(i) = block.color(i);
}
}
/// Initialize this color block.
ColorBlock::ColorBlock(const Image * img, uint x, uint y)
{
init(img, x, y);
}
void ColorBlock::init(const Image * img, uint x, uint y)
{
nvDebugCheck(img != NULL);
const uint bw = min(img->width() - x, 4U);
const uint bh = min(img->height() - y, 4U);
nvDebugCheck(bw != 0);
nvDebugCheck(bh != 0);
int remainder[] = {
0, 0, 0, 0,
0, 1, 0, 1,
0, 1, 2, 0,
0, 1, 2, 3,
};
// Blocks that are smaller than 4x4 are handled by repeating the pixels.
// @@ Thats only correct when block size is 1, 2 or 4, but not with 3.
for(uint i = 0; i < 4; i++) {
//const int by = i % bh;
const int by = remainder[(bh - 1) * 4 + i];
for(uint e = 0; e < 4; e++) {
//const int bx = e % bw;
const int bx = remainder[(bw - 1) * 4 + e];
color(e, i) = img->pixel(x + bx, y + by);
}
}
}
void ColorBlock::swizzleDXT5n()
{
for(int i = 0; i < 16; i++)
{
Color32 c = m_color[i];
m_color[i] = Color32(0, c.r, 0, c.g);
}
}
void ColorBlock::splatX()
{
for(int i = 0; i < 16; i++)
{
uint8 x = m_color[i].r;
m_color[i] = Color32(x, x, x, x);
}
}
void ColorBlock::splatY()
{
for(int i = 0; i < 16; i++)
{
uint8 y = m_color[i].g;
m_color[i] = Color32(y, y, y, y);
}
}
/// Count number of unique colors in this color block.
uint ColorBlock::countUniqueColors() const
{
uint count = 0;
// @@ This does not have to be o(n^2)
for(int i = 0; i < 16; i++)
{
bool unique = true;
for(int j = 0; j < i; j++) {
if( m_color[i] != m_color[j] ) {
unique = false;
}
}
if( unique ) {
count++;
}
}
return count;
}
/// Get average color of the block.
Color32 ColorBlock::averageColor() const
{
uint r, g, b, a;
r = g = b = a = 0;
for(uint i = 0; i < 16; i++) {
r += m_color[i].r;
g += m_color[i].g;
b += m_color[i].b;
a += m_color[i].a;
}
return Color32(uint8(r / 16), uint8(g / 16), uint8(b / 16), uint8(a / 16));
}
/// Get diameter color range.
void ColorBlock::diameterRange(Color32 * start, Color32 * end) const
{
nvDebugCheck(start != NULL);
nvDebugCheck(end != NULL);
Color32 c0, c1;
uint best_dist = 0;
for(int i = 0; i < 16; i++) {
for (int j = i+1; j < 16; j++) {
uint dist = colorDistance(m_color[i], m_color[j]);
if( dist > best_dist ) {
best_dist = dist;
c0 = m_color[i];
c1 = m_color[j];
}
}
}
*start = c0;
*end = c1;
}
/// Get luminance color range.
void ColorBlock::luminanceRange(Color32 * start, Color32 * end) const
{
nvDebugCheck(start != NULL);
nvDebugCheck(end != NULL);
Color32 minColor, maxColor;
uint minLuminance, maxLuminance;
maxLuminance = minLuminance = colorLuminance(m_color[0]);
for(uint i = 1; i < 16; i++)
{
uint luminance = colorLuminance(m_color[i]);
if (luminance > maxLuminance) {
maxLuminance = luminance;
maxColor = m_color[i];
}
else if (luminance < minLuminance) {
minLuminance = luminance;
minColor = m_color[i];
}
}
*start = minColor;
*end = maxColor;
}
/// Get color range based on the bounding box.
void ColorBlock::boundsRange(Color32 * start, Color32 * end) const
{
nvDebugCheck(start != NULL);
nvDebugCheck(end != NULL);
Color32 minColor(255, 255, 255);
Color32 maxColor(0, 0, 0);
for(uint i = 0; i < 16; i++)
{
if (m_color[i].r < minColor.r) { minColor.r = m_color[i].r; }
if (m_color[i].g < minColor.g) { minColor.g = m_color[i].g; }
if (m_color[i].b < minColor.b) { minColor.b = m_color[i].b; }
if (m_color[i].r > maxColor.r) { maxColor.r = m_color[i].r; }
if (m_color[i].g > maxColor.g) { maxColor.g = m_color[i].g; }
if (m_color[i].b > maxColor.b) { maxColor.b = m_color[i].b; }
}
// Offset range by 1/16 of the extents
Color32 inset;
inset.r = (maxColor.r - minColor.r) >> 4;
inset.g = (maxColor.g - minColor.g) >> 4;
inset.b = (maxColor.b - minColor.b) >> 4;
minColor.r = (minColor.r + inset.r <= 255) ? minColor.r + inset.r : 255;
minColor.g = (minColor.g + inset.g <= 255) ? minColor.g + inset.g : 255;
minColor.b = (minColor.b + inset.b <= 255) ? minColor.b + inset.b : 255;
maxColor.r = (maxColor.r >= inset.r) ? maxColor.r - inset.r : 0;
maxColor.g = (maxColor.g >= inset.g) ? maxColor.g - inset.g : 0;
maxColor.b = (maxColor.b >= inset.b) ? maxColor.b - inset.b : 0;
*start = minColor;
*end = maxColor;
}
/// Get color range based on the bounding box.
void ColorBlock::boundsRangeAlpha(Color32 * start, Color32 * end) const
{
nvDebugCheck(start != NULL);
nvDebugCheck(end != NULL);
Color32 minColor(255, 255, 255, 255);
Color32 maxColor(0, 0, 0, 0);
for(uint i = 0; i < 16; i++)
{
if (m_color[i].r < minColor.r) { minColor.r = m_color[i].r; }
if (m_color[i].g < minColor.g) { minColor.g = m_color[i].g; }
if (m_color[i].b < minColor.b) { minColor.b = m_color[i].b; }
if (m_color[i].a < minColor.a) { minColor.a = m_color[i].a; }
if (m_color[i].r > maxColor.r) { maxColor.r = m_color[i].r; }
if (m_color[i].g > maxColor.g) { maxColor.g = m_color[i].g; }
if (m_color[i].b > maxColor.b) { maxColor.b = m_color[i].b; }
if (m_color[i].a > maxColor.a) { maxColor.a = m_color[i].a; }
}
// Offset range by 1/16 of the extents
Color32 inset;
inset.r = (maxColor.r - minColor.r) >> 4;
inset.g = (maxColor.g - minColor.g) >> 4;
inset.b = (maxColor.b - minColor.b) >> 4;
inset.a = (maxColor.a - minColor.a) >> 4;
minColor.r = (minColor.r + inset.r <= 255) ? minColor.r + inset.r : 255;
minColor.g = (minColor.g + inset.g <= 255) ? minColor.g + inset.g : 255;
minColor.b = (minColor.b + inset.b <= 255) ? minColor.b + inset.b : 255;
minColor.a = (minColor.a + inset.a <= 255) ? minColor.a + inset.a : 255;
maxColor.r = (maxColor.r >= inset.r) ? maxColor.r - inset.r : 0;
maxColor.g = (maxColor.g >= inset.g) ? maxColor.g - inset.g : 0;
maxColor.b = (maxColor.b >= inset.b) ? maxColor.b - inset.b : 0;
maxColor.a = (maxColor.a >= inset.a) ? maxColor.a - inset.a : 0;
*start = minColor;
*end = maxColor;
}
void ColorBlock::bestFitRange(Color32 * start, Color32 * end) const
{
nvDebugCheck(start != NULL);
nvDebugCheck(end != NULL);
Vector3 axis = bestFitLine().direction();
computeRange(axis, start, end);
}
/// Sort colors by abosolute value in their 16 bit representation.
void ColorBlock::sortColorsByAbsoluteValue()
{
// Dummy selection sort.
for( uint a = 0; a < 16; a++ ) {
uint max = a;
Color16 cmax(m_color[a]);
for( uint b = a+1; b < 16; b++ ) {
Color16 cb(m_color[b]);
if( cb.u > cmax.u ) {
max = b;
cmax = cb;
}
}
swap( m_color[a], m_color[max] );
}
}
/// Find extreme colors in the given axis.
void ColorBlock::computeRange(Vector3::Arg axis, Color32 * start, Color32 * end) const
{
nvDebugCheck(start != NULL);
nvDebugCheck(end != NULL);
int mini, maxi;
mini = maxi = 0;
float min, max;
min = max = dot(Vector3(m_color[0].r, m_color[0].g, m_color[0].b), axis);
for(uint i = 1; i < 16; i++)
{
const Vector3 vec(m_color[i].r, m_color[i].g, m_color[i].b);
float val = dot(vec, axis);
if( val < min ) {
mini = i;
min = val;
}
else if( val > max ) {
maxi = i;
max = val;
}
}
*start = m_color[mini];
*end = m_color[maxi];
}
/// Sort colors in the given axis.
void ColorBlock::sortColors(const Vector3 & axis)
{
float luma_array[16];
for(uint i = 0; i < 16; i++) {
const Vector3 vec(m_color[i].r, m_color[i].g, m_color[i].b);
luma_array[i] = dot(vec, axis);
}
// Dummy selection sort.
for( uint a = 0; a < 16; a++ ) {
uint min = a;
for( uint b = a+1; b < 16; b++ ) {
if( luma_array[b] < luma_array[min] ) {
min = b;
}
}
swap( luma_array[a], luma_array[min] );
swap( m_color[a], m_color[min] );
}
}
/// Get least squares line that best approxiamtes the points of the color block.
Line3 ColorBlock::bestFitLine() const
{
Array<Vector3> pointArray(16);
for(int i = 0; i < 16; i++) {
pointArray.append(Vector3(m_color[i].r, m_color[i].g, m_color[i].b));
}
return Fit::bestLine(pointArray);
}
/// Get the volume of the color block.
float ColorBlock::volume() const
{
Box bounds;
bounds.clearBounds();
for(int i = 0; i < 16; i++) {
const Vector3 point(m_color[i].r, m_color[i].g, m_color[i].b);
bounds.addPointToBounds(point);
}
return bounds.volume();
}

96
src/nvimage/ColorBlock.h Normal file
View File

@ -0,0 +1,96 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_IMAGE_COLORBLOCK_H
#define NV_IMAGE_COLORBLOCK_H
#include <nvmath/Color.h>
#include <nvmath/Fitting.h> // Line3
namespace nv
{
class Image;
/// Uncompressed 4x4 color block.
struct ColorBlock
{
ColorBlock();
ColorBlock(const ColorBlock & block);
ColorBlock(const Image * img, uint x, uint y);
void init(const Image * img, uint x, uint y);
void swizzleDXT5n();
void splatX();
void splatY();
uint countUniqueColors() const;
Color32 averageColor() const;
void diameterRange(Color32 * start, Color32 * end) const;
void luminanceRange(Color32 * start, Color32 * end) const;
void boundsRange(Color32 * start, Color32 * end) const;
void boundsRangeAlpha(Color32 * start, Color32 * end) const;
void bestFitRange(Color32 * start, Color32 * end) const;
void sortColorsByAbsoluteValue();
void computeRange(const Vector3 & axis, Color32 * start, Color32 * end) const;
void sortColors(const Vector3 & axis);
Line3 bestFitLine() const;
float volume() const;
Line3 diameterLine() const;
// Accessors
const Color32 * colors() const;
Color32 color(uint i) const;
Color32 & color(uint i);
Color32 color(uint x, uint y) const;
Color32 & color(uint x, uint y);
private:
Color32 m_color[4*4];
};
/// Get pointer to block colors.
inline const Color32 * ColorBlock::colors() const
{
return m_color;
}
/// Get block color.
inline Color32 ColorBlock::color(uint i) const
{
nvDebugCheck(i < 16);
return m_color[i];
}
/// Get block color.
inline Color32 & ColorBlock::color(uint i)
{
nvDebugCheck(i < 16);
return m_color[i];
}
/// Get block color.
inline Color32 ColorBlock::color(uint x, uint y) const
{
nvDebugCheck(x < 4 && y < 4);
return m_color[y * 4 + x];
}
/// Get block color.
inline Color32 & ColorBlock::color(uint x, uint y)
{
nvDebugCheck(x < 4 && y < 4);
return m_color[y * 4 + x];
}
} // nv namespace
#endif // NV_IMAGE_COLORBLOCK_H

View File

@ -0,0 +1,258 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvimage/DirectDrawSurface.h>
#include <string.h> // memset
using namespace nv;
#if !defined(MAKEFOURCC)
# define MAKEFOURCC(ch0, ch1, ch2, ch3) \
(uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \
(uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 ))
#endif
namespace
{
static const uint FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' ');
static const uint FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1');
static const uint FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2');
static const uint FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3');
static const uint FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4');
static const uint FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5');
static const uint FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B');
static const uint FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1');
static const uint FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2');
static const uint DDSD_CAPS = 0x00000001U;
static const uint DDSD_PIXELFORMAT = 0x00001000U;
static const uint DDSD_WIDTH = 0x00000004U;
static const uint DDSD_HEIGHT = 0x00000002U;
static const uint DDSD_PITCH = 0x00000008U;
static const uint DDSD_MIPMAPCOUNT = 0x00020000U;
static const uint DDSD_LINEARSIZE = 0x00080000U;
static const uint DDSD_DEPTH = 0x00800000U;
static const uint DDSCAPS_COMPLEX = 0x00000008U;
static const uint DDSCAPS_TEXTURE = 0x00001000U;
static const uint DDSCAPS_MIPMAP = 0x00400000U;
static const uint DDSCAPS2_VOLUME = 0x00200000U;
static const uint DDSCAPS2_CUBEMAP = 0x00000200U;
static const uint DDSCAPS2_CUBEMAP_POSITIVEX = 0x00000400U;
static const uint DDSCAPS2_CUBEMAP_NEGATIVEX = 0x00000800U;
static const uint DDSCAPS2_CUBEMAP_POSITIVEY = 0x00001000U;
static const uint DDSCAPS2_CUBEMAP_NEGATIVEY = 0x00002000U;
static const uint DDSCAPS2_CUBEMAP_POSITIVEZ = 0x00004000U;
static const uint DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000U;
static const uint DDSCAPS2_CUBEMAP_ALL_FACES = 0x0000F000U;
static const uint DDPF_RGB = 0x00000040U;
static const uint DDPF_FOURCC = 0x00000004U;
static const uint DDPF_ALPHAPIXELS = 0x00000001U;
}
DDSHeader::DDSHeader()
{
this->fourcc = FOURCC_DDS;
this->size = 124;
this->flags = (DDSD_CAPS|DDSD_PIXELFORMAT);
this->height = 0;
this->width = 0;
this->pitch = 0;
this->depth = 0;
this->mipmapcount = 0;
memset(this->reserved, 0, sizeof(this->reserved));
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (0 << 16) | (1 << 8) | (0); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;
this->pf.fourcc = 0;
this->pf.bitcount = 0;
this->pf.rmask = 0;
this->pf.gmask = 0;
this->pf.bmask = 0;
this->pf.amask = 0;
this->caps.caps1 = DDSCAPS_TEXTURE;
this->caps.caps2 = 0;
this->caps.caps3 = 0;
this->caps.caps4 = 0;
this->notused = 0;
}
void DDSHeader::setWidth(uint w)
{
this->flags |= DDSD_WIDTH;
this->width = w;
}
void DDSHeader::setHeight(uint h)
{
this->flags |= DDSD_HEIGHT;
this->height = h;
}
void DDSHeader::setDepth(uint d)
{
this->flags |= DDSD_DEPTH;
this->height = d;
}
void DDSHeader::setMipmapCount(uint count)
{
if (count == 0)
{
this->flags &= ~DDSD_MIPMAPCOUNT;
this->mipmapcount = 0;
if (this->caps.caps2 == 0) {
this->caps.caps1 = DDSCAPS_TEXTURE;
}
else {
this->caps.caps1 = DDSCAPS_TEXTURE | DDSCAPS_COMPLEX;
}
}
else
{
this->flags |= DDSD_MIPMAPCOUNT;
this->mipmapcount = count;
this->caps.caps1 |= DDSCAPS_COMPLEX | DDSCAPS_MIPMAP;
}
}
void DDSHeader::setTexture2D()
{
// nothing to do here.
}
void DDSHeader::setTexture3D()
{
this->caps.caps2 = DDSCAPS2_VOLUME;
}
void DDSHeader::setTextureCube()
{
this->caps.caps1 |= DDSCAPS_COMPLEX;
this->caps.caps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALL_FACES;
}
void DDSHeader::setLinearSize(uint size)
{
this->flags &= ~DDSD_PITCH;
this->flags |= DDSD_LINEARSIZE;
this->pitch = size;
}
void DDSHeader::setPitch(uint pitch)
{
this->flags &= ~DDSD_LINEARSIZE;
this->flags |= DDSD_PITCH;
this->pitch = pitch;
}
void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
{
// set fourcc pixel format.
this->pf.flags = DDPF_FOURCC;
this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3);
this->pf.bitcount = 0;
this->pf.rmask = 0;
this->pf.gmask = 0;
this->pf.bmask = 0;
this->pf.amask = 0;
}
void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask)
{
// Make sure the masks are correct.
nvCheck((rmask & gmask) == 0);
nvCheck((rmask & bmask) == 0);
nvCheck((rmask & amask) == 0);
nvCheck((gmask & bmask) == 0);
nvCheck((gmask & amask) == 0);
nvCheck((bmask & amask) == 0);
this->pf.flags = DDPF_RGB;
if (amask != 0) {
this->pf.flags |= DDPF_ALPHAPIXELS;
}
if (bitcount == 0)
{
// Compute bit count from the masks.
uint total = rmask | gmask | bmask | amask;
while(total != 0) {
bitcount++;
total >>= 1;
}
// @@ Align to 8?
}
this->pf.fourcc = 0;
this->pf.bitcount = bitcount;
this->pf.rmask = rmask;
this->pf.gmask = gmask;
this->pf.bmask = bmask;
this->pf.amask = amask;
}
void DDSHeader::swapBytes()
{
this->fourcc = POSH_LittleU32(this->fourcc);
this->size = POSH_LittleU32(this->size);
this->flags = POSH_LittleU32(this->flags);
this->height = POSH_LittleU32(this->height);
this->width = POSH_LittleU32(this->width);
this->pitch = POSH_LittleU32(this->pitch);
this->depth = POSH_LittleU32(this->depth);
this->mipmapcount = POSH_LittleU32(this->mipmapcount);
for(int i = 0; i < 11; i++) {
this->reserved[i] = POSH_LittleU32(this->reserved[i]);
}
this->pf.size = POSH_LittleU32(this->pf.size);
this->pf.flags = POSH_LittleU32(this->pf.flags);
this->pf.fourcc = POSH_LittleU32(this->pf.fourcc);
this->pf.bitcount = POSH_LittleU32(this->pf.bitcount);
this->pf.rmask = POSH_LittleU32(this->pf.rmask);
this->pf.gmask = POSH_LittleU32(this->pf.gmask);
this->pf.bmask = POSH_LittleU32(this->pf.bmask);
this->pf.amask = POSH_LittleU32(this->pf.amask);
this->caps.caps1 = POSH_LittleU32(this->caps.caps1);
this->caps.caps2 = POSH_LittleU32(this->caps.caps2);
this->caps.caps3 = POSH_LittleU32(this->caps.caps3);
this->caps.caps4 = POSH_LittleU32(this->caps.caps4);
this->notused = POSH_LittleU32(this->notused);
}

View File

@ -0,0 +1,85 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_IMAGE_DIRECTDRAWSURFACE_H
#define NV_IMAGE_DIRECTDRAWSURFACE_H
#include <nvcore/nvcore.h>
namespace nv
{
struct DDSPixelFormat {
uint size;
uint flags;
uint fourcc;
uint bitcount;
uint rmask;
uint gmask;
uint bmask;
uint amask;
};
struct DDSCaps {
uint caps1;
uint caps2;
uint caps3;
uint caps4;
};
/// DDS file header.
struct DDSHeader {
uint fourcc;
uint size;
uint flags;
uint height;
uint width;
uint pitch;
uint depth;
uint mipmapcount;
uint reserved[11];
DDSPixelFormat pf;
DDSCaps caps;
uint notused;
// Helper methods.
DDSHeader();
void setWidth(uint w);
void setHeight(uint h);
void setDepth(uint d);
void setMipmapCount(uint count);
void setLinearSize(uint size);
void setPitch(uint pitch);
void setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3);
void setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask);
void setTexture2D();
void setTexture3D();
void setTextureCube();
void swapBytes();
};
} // nv namespace
#endif // NV_IMAGE_DIRECTDRAWSURFACE_H

572
src/nvimage/Filter.cpp Normal file
View File

@ -0,0 +1,572 @@
// This code is in the public domain -- castanyo@yahoo.es
/** @file Filter.cpp
* @brief Image filters.
*
* Jonathan Blow articles:
* http://number-none.com/product/Mipmapping, Part 1/index.html
* http://number-none.com/product/Mipmapping, Part 2/index.html
*
* References from Thacher Ulrich:
* See _Graphics Gems III_ "General Filtered Image Rescaling", Dale A. Schumacher
*
* References from Paul Heckbert:
* A.V. Oppenheim, R.W. Schafer, Digital Signal Processing, Prentice-Hall, 1975
*
* R.W. Hamming, Digital Filters, Prentice-Hall, Englewood Cliffs, NJ, 1983
*
* W.K. Pratt, Digital Image Processing, John Wiley and Sons, 1978
*
* H.S. Hou, H.C. Andrews, "Cubic Splines for Image Interpolation and
* Digital Filtering", IEEE Trans. Acoustics, Speech, and Signal Proc.,
* vol. ASSP-26, no. 6, Dec. 1978, pp. 508-517
*
* Paul Heckbert's zoom library.
* http://www.xmission.com/~legalize/zoom.html
*
* Reconstruction Filters in Computer Graphics
* http://www.mentallandscape.com/Papers_siggraph88.pdf
*
*/
#include <nvcore/Containers.h> // swap
#include <nvmath/nvmath.h> // fabs
#include <nvmath/Vector.h> // Vector4
#include <nvimage/Filter.h>
using namespace nv;
namespace
{
// support = 0.5
inline static float filter_box(float x)
{
if( x < -0.5f ) return 0.0f;
if( x <= 0.5 ) return 1.0f;
return 0.0f;
}
// support = 1.0
inline static float filter_triangle(float x)
{
if( x < -1.0f ) return 0.0f;
if( x < 0.0f ) return 1.0f + x;
if( x < 1.0f ) return 1.0f - x;
return 0.0f;
}
// support = 1.5
inline static float filter_quadratic(float x)
{
if( x < 0.0f ) x = -x;
if( x < 0.5f ) return 0.75f - x * x;
if( x < 1.5f ) {
float t = x - 1.5f;
return 0.5f * t * t;
}
return 0.0f;
}
// @@ Filter from tulrich.
// support 1.0
inline static float filter_cubic(float x)
{
// f(t) = 2|t|^3 - 3|t|^2 + 1, -1 <= t <= 1
if( x < 0.0f ) x = -x;
if( x < 1.0f ) return((2.0f * x - 3.0f) * x * x + 1.0f);
return 0.0f;
}
// @@ Paul Heckbert calls this cubic instead of spline.
// support = 2.0
inline static float filter_spline(float x)
{
if( x < 0.0f ) x = -x;
if( x < 1.0f ) return (4.0f + x * x * (-6.0f + x * 3.0f)) / 6.0f;
if( x < 2.0f ) {
float t = 2.0f - x;
return t * t * t / 6.0f;
}
return 0.0f;
}
/// Sinc function.
inline float sincf( const float x )
{
if( fabs(x) < NV_EPSILON ) {
return 1.0 ;
//return 1.0f + x*x*(-1.0f/6.0f + x*x*1.0f/120.0f);
}
else {
return sin(x) / x;
}
}
// support = 3.0
inline static float filter_lanczos3(float x)
{
if( x < 0.0f ) x = -x;
if( x < 3.0f ) return(sincf(x) * sincf(x / 3.0f));
return 0.0f;
}
// Mitchell & Netravali's two-param cubic
// see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88
// support = 2.0
inline static float filter_mitchell(float x, float b, float c)
{
// @@ Coefficients could be precomputed.
// @@ if b and c are fixed, these are constants.
const float p0 = (6.0f - 2.0f * b) / 6.0f;
const float p2 = (-18.0f + 12.0f * b + 6.0f * c) / 6.0f;
const float p3 = (12.0f - 9.0f * b - 6.0f * c) / 6.0f;
const float q0 = (8.0f * b + 24.0f * c) / 6.0f;
const float q1 = (-12.0f * b - 48.0f * c) / 6.0f;
const float q2 = (6.0f * b + 30.0f * c) / 6.0f;
const float q3 = (-b - 6.0f * c) / 6.0f;
if( x < 0.0f ) x = -x;
if( x < 1.0f ) return p0 + x * x * (p2 + x * p3);
if( x < 2.0f ) return q0 + x * (q1 + x * (q2 + x * q3));
return 0.0f;
}
inline static float filter_mitchell(float x)
{
return filter_mitchell(x, 1.0f/3.0f, 1.0f/3.0f);
}
// Bessel function of the first kind from Jon Blow's article.
// http://mathworld.wolfram.com/BesselFunctionoftheFirstKind.html
// http://en.wikipedia.org/wiki/Bessel_function
static float bessel0(float x)
{
const float EPSILON_RATIO = 1E-6;
float xh, sum, pow, ds;
int k;
xh = 0.5 * x;
sum = 1.0;
pow = 1.0;
k = 0;
ds = 1.0;
while (ds > sum * EPSILON_RATIO) {
++k;
pow = pow * (xh / k);
ds = pow * pow;
sum = sum + ds;
}
return sum;
}
// Alternative bessel function from Paul Heckbert.
static float _bessel0(float x)
{
const float EPSILON_RATIO = 1E-6;
float sum = 1.0f;
float y = x * x / 4.0f;
float t = y;
for(int i = 2; t > EPSILON_RATIO; i++) {
sum += t;
t *= y / float(i * i);
}
return sum;
}
// support = 1.0
inline static float filter_kaiser(float x, float alpha)
{
return bessel0(alpha * sqrtf(1 - x * x)) / bessel0(alpha);
}
inline static float filter_kaiser(float x)
{
return filter_kaiser(x, 4.0f);
}
// Array of filters.
static Filter s_filter_array[] = {
{filter_box, 0.5f}, // Box
{filter_triangle, 1.0f}, // Triangle
{filter_quadratic, 1.5f}, // Quadratic
{filter_cubic, 1.0f}, // Cubic
{filter_spline, 2.0f}, // Spline
{filter_lanczos3, 3.0f}, // Lanczos
{filter_mitchell, 1.0f}, // Mitchell
{filter_kaiser, 1.0f}, // Kaiser
};
} // namespace
/// Ctor.
Kernel1::Kernel1(uint width) : w(width)
{
data = new float[w];
}
/// Copy ctor.
Kernel1::Kernel1(const Kernel1 & k) : w(k.w)
{
data = new float[w];
for(uint i = 0; i < w; i++) {
data[i] = k.data[i];
}
}
/// Dtor.
Kernel1::~Kernel1()
{
delete data;
}
/// Normalize the filter.
void Kernel1::normalize()
{
float total = 0.0f;
for(uint i = 0; i < w; i++) {
total += data[i];
}
float inv = 1.0f / total;
for(uint i = 0; i < w; i++) {
data[i] *= inv;
}
}
/// Init 1D Box filter.
void Kernel1::initFilter(Filter::Enum f)
{
nvCheck((w & 1) == 0);
nvCheck(f < Filter::Num);
float (* filter_function)(float) = s_filter_array[f].function;
const float support = s_filter_array[f].support;
const float half_width = float(w / 2);
const float offset = -half_width;
const float nudge = 0.5f;
for(uint i = 0; i < w; i++) {
const float x = (i + offset) + nudge;
data[i] = filter_function(x * support / half_width);
}
normalize();
}
/// Init 1D sinc filter.
void Kernel1::initSinc(float stretch /*= 1*/)
{
nvCheck((w & 1) == 0);
const float half_width = float(w / 2);
const float offset = -half_width;
const float nudge = 0.5f;
for(uint i = 0; i < w; i++) {
const float x = (i + offset) + nudge;
data[i] = sincf(PI * x * stretch);
}
normalize();
}
/// Init 1D windowed Kaiser filter.
void Kernel1::initKaiser(float alpha, float stretch /*= 1*/)
{
nvCheck((w & 1) == 0);
const float half_width = float(w / 2);
const float offset = -half_width;
const float nudge = 0.5f;
for(uint i = 0; i < w; i++) {
const float x = (i + offset) + nudge;
const float sinc_value = sincf(PI * x * stretch);
const float window_value = filter_kaiser(x / half_width, alpha);
data[i] = sinc_value * window_value; // @@ sinc windowed by kaiser
}
normalize();
}
/// Init 1D Mitchell filter.
void Kernel1::initMitchell(float b, float c)
{
nvCheck((w & 1) == 0);
const float half_width = float(w / 2);
const float offset = -half_width;
const float nudge = 0.5f;
for(uint i = 0; i < w; i++) {
const float x = (i + offset) + nudge;
data[i] = filter_mitchell(x / half_width, b, c);
}
normalize();
}
/// Print the kernel for debugging purposes.
void Kernel1::debugPrint()
{
for(uint i = 0; i < w; i++) {
nvDebug("%d: %f\n", i, data[i]);
}
}
/// Ctor.
Kernel2::Kernel2(uint width) : w(width)
{
data = new float[w*w];
}
/// Copy ctor.
Kernel2::Kernel2(const Kernel2 & k) : w(k.w)
{
data = new float[w*w];
for(uint i = 0; i < w*w; i++) {
data[i] = k.data[i];
}
}
/// Dtor.
Kernel2::~Kernel2()
{
delete data;
}
/// Normalize the filter.
void Kernel2::normalize()
{
float total = 0.0f;
for(uint i = 0; i < w*w; i++) {
total += fabs(data[i]);
}
float inv = 1.0f / total;
for(uint i = 0; i < w*w; i++) {
data[i] *= inv;
}
}
/// Transpose the kernel.
void Kernel2::transpose()
{
for(uint i = 0; i < w; i++) {
for(uint j = i+1; j < w; j++) {
swap(data[i*w + j], data[j*w + i]);
}
}
}
/// Init laplacian filter, usually used for sharpening.
void Kernel2::initLaplacian()
{
nvDebugCheck(w == 3);
// data[0] = -1; data[1] = -1; data[2] = -1;
// data[3] = -1; data[4] = +8; data[5] = -1;
// data[6] = -1; data[7] = -1; data[8] = -1;
data[0] = +0; data[1] = -1; data[2] = +0;
data[3] = -1; data[4] = +4; data[5] = -1;
data[6] = +0; data[7] = -1; data[8] = +0;
// data[0] = +1; data[1] = -2; data[2] = +1;
// data[3] = -2; data[4] = +4; data[5] = -2;
// data[6] = +1; data[7] = -2; data[8] = +1;
}
/// Init simple edge detection filter.
void Kernel2::initEdgeDetection()
{
nvCheck(w == 3);
data[0] = 0; data[1] = 0; data[2] = 0;
data[3] = -1; data[4] = 0; data[5] = 1;
data[6] = 0; data[7] = 0; data[8] = 0;
}
/// Init sobel filter.
void Kernel2::initSobel()
{
if (w == 3)
{
data[0] = -1; data[1] = 0; data[2] = 1;
data[3] = -2; data[4] = 0; data[5] = 2;
data[6] = -1; data[7] = 0; data[8] = 1;
}
else if (w == 5)
{
float elements[] = {
-1, -2, 0, 2, 1,
-2, -3, 0, 3, 2,
-3, -4, 0, 4, 3,
-2, -3, 0, 3, 2,
-1, -2, 0, 2, 1
};
for (int i = 0; i < 5*5; i++) {
data[i] = elements[i];
}
}
else if (w == 7)
{
float elements[] = {
-1, -2, -3, 0, 3, 2, 1,
-2, -3, -4, 0, 4, 3, 2,
-3, -4, -5, 0, 5, 4, 3,
-4, -5, -6, 0, 6, 5, 4,
-3, -4, -5, 0, 5, 4, 3,
-2, -3, -4, 0, 4, 3, 2,
-1, -2, -3, 0, 3, 2, 1
};
for (int i = 0; i < 7*7; i++) {
data[i] = elements[i];
}
}
else if (w == 9)
{
float elements[] = {
-1, -2, -3, -4, 0, 4, 3, 2, 1,
-2, -3, -4, -5, 0, 5, 4, 3, 2,
-3, -4, -5, -6, 0, 6, 5, 4, 3,
-4, -5, -6, -7, 0, 7, 6, 5, 4,
-5, -6, -7, -8, 0, 8, 7, 6, 5,
-4, -5, -6, -7, 0, 7, 6, 5, 4,
-3, -4, -5, -6, 0, 6, 5, 4, 3,
-2, -3, -4, -5, 0, 5, 4, 3, 2,
-1, -2, -3, -4, 0, 4, 3, 2, 1
};
for (int i = 0; i < 9*9; i++) {
data[i] = elements[i];
}
}
}
/// Init prewitt filter.
void Kernel2::initPrewitt()
{
if (w == 3)
{
data[0] = -1; data[1] = 0; data[2] = -1;
data[3] = -1; data[4] = 0; data[5] = -1;
data[6] = -1; data[7] = 0; data[8] = -1;
}
else if (w == 5)
{
// @@ Is this correct?
float elements[] = {
-2, -1, 0, 1, 2,
-2, -1, 0, 1, 2,
-2, -1, 0, 1, 2,
-2, -1, 0, 1, 2,
-2, -1, 0, 1, 2
};
for (int i = 0; i < 5*5; i++) {
data[i] = elements[i];
}
}
}
/// Init blended sobel filter.
void Kernel2::initBlendedSobel(const Vector4 & scale)
{
nvCheck(w == 9);
{
float elements[] = {
-1, -2, -3, -4, 0, 4, 3, 2, 1,
-2, -3, -4, -5, 0, 5, 4, 3, 2,
-3, -4, -5, -6, 0, 6, 5, 4, 3,
-4, -5, -6, -7, 0, 7, 6, 5, 4,
-5, -6, -7, -8, 0, 8, 7, 6, 5,
-4, -5, -6, -7, 0, 7, 6, 5, 4,
-3, -4, -5, -6, 0, 6, 5, 4, 3,
-2, -3, -4, -5, 0, 5, 4, 3, 2,
-1, -2, -3, -4, 0, 4, 3, 2, 1
};
for (int i = 0; i < 9*9; i++) {
data[i] = elements[i] * scale.w();
}
}
{
float elements[] = {
-1, -2, -3, 0, 3, 2, 1,
-2, -3, -4, 0, 4, 3, 2,
-3, -4, -5, 0, 5, 4, 3,
-4, -5, -6, 0, 6, 5, 4,
-3, -4, -5, 0, 5, 4, 3,
-2, -3, -4, 0, 4, 3, 2,
-1, -2, -3, 0, 3, 2, 1,
};
for (int i = 0; i < 7; i++) {
for (int e = 0; e < 7; e++) {
data[i * 9 + e + 1] += elements[i * 7 + e] * scale.z();
}
}
}
{
float elements[] = {
-1, -2, 0, 2, 1,
-2, -3, 0, 3, 2,
-3, -4, 0, 4, 3,
-2, -3, 0, 3, 2,
-1, -2, 0, 2, 1
};
for (int i = 0; i < 5; i++) {
for (int e = 0; e < 5; e++) {
data[i * 9 + e + 2] += elements[i * 5 + e] * scale.y();
}
}
}
{
float elements[] = {
-1, 0, 1,
-2, 0, 2,
-1, 0, 1,
};
for (int i = 0; i < 3; i++) {
for (int e = 0; e < 3; e++) {
data[i * 9 + e + 3] += elements[i * 3 + e] * scale.x();
}
}
}
}
/*PI_DECLARE_TEST(BesselTest) {
for(int i = 0; i < 8; i++) {
nvDebug("bessel0(%i) %f =? %f\n", i, bessel0(i), _bessel0(i));
PI_TEST(equalf(bessel0(i), _bessel0(i)));
}
return PiTestUnit::Succeed;
}*/

103
src/nvimage/Filter.h Normal file
View File

@ -0,0 +1,103 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_IMAGE_FILTER_H
#define NV_IMAGE_FILTER_H
#include <nvimage/nvimage.h>
namespace nv
{
class Vector4;
/// A filter function.
struct Filter
{
// Standard filters.
enum Enum
{
Box,
Triangle,
Quadratic, // Bell
Cubic,
Spline,
Lanczos,
Mitchell,
Kaiser,
Num
};
float (*function)(float x);
float support;
};
/// A 1D kernel. Used to precompute filter weights.
class Kernel1
{
public:
NVIMAGE_API Kernel1(uint width);
NVIMAGE_API Kernel1(const Kernel1 & k);
NVIMAGE_API ~Kernel1();
NVIMAGE_API void normalize();
float valueAt(uint x) const {
return data[x];
}
uint width() const {
return w;
}
NVIMAGE_API void initFilter(Filter::Enum filter);
NVIMAGE_API void initSinc(float stretch = 1);
NVIMAGE_API void initKaiser(float alpha = 4.0f, float stretch = 1.0f);
NVIMAGE_API void initMitchell(float b = 1.0f/3.0f, float c = 1.0f/3.0f);
NVIMAGE_API void debugPrint();
private:
const uint w;
float * data;
};
/// A 2D kernel.
class Kernel2
{
public:
NVIMAGE_API Kernel2(uint width);
NVIMAGE_API Kernel2(const Kernel2 & k);
NVIMAGE_API ~Kernel2();
NVIMAGE_API void normalize();
NVIMAGE_API void transpose();
float valueAt(uint x, uint y) const {
return data[y * w + x];
}
uint width() const {
return w;
}
NVIMAGE_API void initLaplacian();
NVIMAGE_API void initEdgeDetection();
NVIMAGE_API void initSobel();
NVIMAGE_API void initPrewitt();
NVIMAGE_API void initBlendedSobel(const Vector4 & scale);
private:
const uint w;
float * data;
};
// @@ Implement non linear filters:
// Kuwahara filter
// Median filter
} // nv namespace
#endif // NV_IMAGE_FILTER_H

839
src/nvimage/FloatImage.cpp Normal file
View File

@ -0,0 +1,839 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Containers.h>
#include <nvcore/Ptr.h>
#include <nvmath/Color.h>
#include "FloatImage.h"
#include "Filter.h"
#include "Image.h"
#include <math.h>
using namespace nv;
namespace
{
static int round(float f)
{
return int(f);
}
static float frac(float f)
{
return f - floor(f);
}
}
/// Ctor.
FloatImage::FloatImage() : m_width(0), m_height(0),
m_componentNum(0), m_count(0), m_mem(NULL)
{
}
/// Ctor. Init from image.
FloatImage::FloatImage(const Image * img) : m_width(0), m_height(0),
m_componentNum(0), m_count(0), m_mem(NULL)
{
initFrom(img);
}
/// Dtor.
FloatImage::~FloatImage()
{
free();
}
/// Init the floating point image from a regular image.
void FloatImage::initFrom(const Image * img)
{
nvCheck(img != NULL);
allocate(4, img->width(), img->height());
float * red_channel = channel(0);
float * green_channel = channel(1);
float * blue_channel = channel(2);
float * alpha_channel = channel(3);
const uint count = m_width * m_height;
for(uint i = 0; i < count; i++) {
Color32 pixel = img->pixel(i);
red_channel[i] = float(pixel.r) / 255.0f;
green_channel[i] = float(pixel.g) / 255.0f;
blue_channel[i] = float(pixel.b) / 255.0f;
alpha_channel[i] = float(pixel.a) / 255.0f;
}
}
/// Convert the floating point image to a regular image.
Image * FloatImage::createImage(uint base_component/*= 0*/, uint num/*= 4*/) const
{
nvCheck(num <= 4);
nvCheck(base_component + num <= m_componentNum);
AutoPtr<Image> img(new Image());
img->allocate(m_width, m_height);
const uint size = m_width * m_height;
for(uint i = 0; i < size; i++) {
uint c;
uint8 rgba[4];
for(c = 0; c < num; c++) {
float f = m_mem[size * (base_component + c) + i];
rgba[c] = nv::clamp(int(255.0f * f), 0, 255);
}
// Fill the rest with 0xff000000;
for(; c < 4; c++) {
rgba[c] = c != 3 ? 0 : 0xff;
}
img->pixel(i) = Color32(rgba[0], rgba[1], rgba[2], rgba[3]);
}
return img.release();
}
/// Convert the floating point image to a regular image. Correct gamma of rgb, but not alpha.
Image * FloatImage::createImageGammaCorrect(float gamma/*= 2.2f*/) const
{
nvCheck(m_componentNum == 4);
AutoPtr<Image> img(new Image());
img->allocate(m_width, m_height);
const float * rChannel = this->channel(0);
const float * gChannel = this->channel(1);
const float * bChannel = this->channel(2);
const float * aChannel = this->channel(3);
const uint size = m_width * m_height;
for(uint i = 0; i < size; i++)
{
const uint8 r = nv::clamp(int(255.0f * pow(rChannel[i], 1.0f/gamma)), 0, 255);
const uint8 g = nv::clamp(int(255.0f * pow(gChannel[i], 1.0f/gamma)), 0, 255);
const uint8 b = nv::clamp(int(255.0f * pow(bChannel[i], 1.0f/gamma)), 0, 255);
const uint8 a = nv::clamp(int(255.0f * aChannel[i]), 0, 255);
img->pixel(i) = Color32(r, g, b, a);
}
return img.release();
}
/// Allocate a 2d float image of the given format and the given extents.
void FloatImage::allocate(uint c, uint w, uint h)
{
nvCheck(m_mem == NULL);
m_width = w;
m_height = h;
m_componentNum = c;
m_count = w * h * c;
m_mem = reinterpret_cast<float *>(nv::mem::malloc(m_count * sizeof(float)));
}
/// Free the image, but don't clear the members.
void FloatImage::free()
{
nvCheck(m_mem != NULL);
nv::mem::free( reinterpret_cast<void *>(m_mem) );
m_mem = NULL;
}
void FloatImage::clear(float f/*=0.0f*/)
{
for(uint i = 0; i < m_count; i++) {
m_mem[i] = f;
}
}
void FloatImage::normalize(uint base_component)
{
nvCheck(base_component + 3 <= m_componentNum);
float * xChannel = this->channel(base_component + 0);
float * yChannel = this->channel(base_component + 1);
float * zChannel = this->channel(base_component + 2);
const uint size = m_width * m_height;
for(uint i = 0; i < size; i++) {
Vector3 normal(xChannel[i], yChannel[i], zChannel[i]);
normal = normalizeSafe(normal, Vector3(zero));
xChannel[i] = normal.x();
yChannel[i] = normal.y();
zChannel[i] = normal.z();
}
}
void FloatImage::packNormals(uint base_component)
{
scaleBias(base_component, 3, 0.5f, 1.0f);
}
void FloatImage::expandNormals(uint base_component)
{
scaleBias(base_component, 3, 2, 0.5);
}
void FloatImage::scaleBias(uint base_component, uint num, float scale, float bias)
{
const uint size = m_width * m_height;
for(uint c = 0; c < num; c++) {
float * ptr = this->channel(base_component + c);
for(uint i = 0; i < size; i++) {
ptr[i] = scale * (ptr[i] + bias);
}
}
}
/// Clamp the elements of the image.
void FloatImage::clamp(float low, float high)
{
for(uint i = 0; i < m_count; i++) {
m_mem[i] = nv::clamp(m_mem[i], low, high);
}
}
/// From gamma to linear space.
void FloatImage::toLinear(uint base_component, uint num, float gamma /*= 2.2f*/)
{
exponentiate(base_component, num, gamma);
}
/// From linear to gamma space.
void FloatImage::toGamma(uint base_component, uint num, float gamma /*= 2.2f*/)
{
exponentiate(base_component, num, 1.0f/gamma);
}
/// Exponentiate the elements of the image.
void FloatImage::exponentiate(uint base_component, uint num, float power)
{
const uint size = m_width * m_height;
for(uint c = 0; c < num; c++) {
float * ptr = this->channel(base_component + c);
for(uint i = 0; i < size; i++) {
ptr[i] = pow(ptr[i], power);
}
}
}
#if 0
float FloatImage::nearest(float x, float y, int c, WrapMode wm) const
{
if( wm == WrapMode_Clamp ) return nearest_clamp(x, y, c);
/*if( wm == WrapMode_Repeat )*/ return nearest_repeat(x, y, c);
//if( wm == WrapMode_Mirror ) return nearest_mirror(x, y, c);
}
float FloatImage::nearest_clamp(int x, int y, const int c) const
{
const int w = m_width;
const int h = m_height;
int ix = ::clamp(x, 0, w-1);
int iy = ::clamp(y, 0, h-1);
return pixel(ix, iy, c);
}
float FloatImage::nearest_repeat(int x, int y, const int c) const
{
const int w = m_width;
const int h = m_height;
int ix = x % w;
int iy = y % h;
return pixel(ix, iy, c);
}
#endif
float FloatImage::nearest(float x, float y, int c, WrapMode wm) const
{
if( wm == WrapMode_Clamp ) return nearest_clamp(x, y, c);
/*if( wm == WrapMode_Repeat )*/ return nearest_repeat(x, y, c);
//if( wm == WrapMode_Mirror ) return nearest_mirror(x, y, c);
}
float FloatImage::linear(float x, float y, int c, WrapMode wm) const
{
if( wm == WrapMode_Clamp ) return linear_clamp(x, y, c);
/*if( wm == WrapMode_Repeat )*/ return linear_repeat(x, y, c);
//if( wm == WrapMode_Mirror ) return linear_mirror(x, y, c);
}
float FloatImage::nearest_clamp(float x, float y, const int c) const
{
const int w = m_width;
const int h = m_height;
int ix = ::clamp(round(x * w), 0, w-1);
int iy = ::clamp(round(y * h), 0, h-1);
return pixel(ix, iy, c);
}
float FloatImage::nearest_repeat(float x, float y, const int c) const
{
const int w = m_width;
const int h = m_height;
int ix = round(frac(x) * w);
int iy = round(frac(y) * h);
return pixel(ix, iy, c);
}
float FloatImage::nearest_mirror(float x, float y, const int c) const
{
// @@ TBD
return 0.0f;
}
float FloatImage::linear_clamp(float x, float y, const int c) const
{
const int w = m_width;
const int h = m_height;
x *= w;
y *= h;
const float fracX = frac(x);
const float fracY = frac(y);
const int ix0 = ::clamp(round(x), 0, w-1);
const int iy0 = ::clamp(round(y), 0, h-1);
const int ix1 = ::clamp(round(x)+1, 0, w-1);
const int iy1 = ::clamp(round(y)+1, 0, h-1);
float f1 = pixel(ix0, iy0, c);
float f2 = pixel(ix1, iy0, c);
float f3 = pixel(ix0, iy1, c);
float f4 = pixel(ix1, iy1, c);
float i1 = lerp(f1, f2, fracX);
float i2 = lerp(f3, f4, fracX);
return lerp(i1, i2, fracY);
}
float FloatImage::linear_repeat(float x, float y, int c) const
{
const int w = m_width;
const int h = m_height;
const float fracX = frac(x * w);
const float fracY = frac(y * h);
int ix0 = round(frac(x) * w);
int iy0 = round(frac(y) * h);
int ix1 = round(frac(x + 1.0f/w) * w);
int iy1 = round(frac(y + 1.0f/h) * h);
float f1 = pixel(ix0, iy0, c);
float f2 = pixel(ix1, iy0, c);
float f3 = pixel(ix0, iy1, c);
float f4 = pixel(ix1, iy1, c);
float i1 = lerp(f1, f2, fracX);
float i2 = lerp(f3, f4, fracX);
return lerp(i1, i2, fracY);
}
float FloatImage::linear_mirror(float x, float y, int c) const
{
// @@ TBD
return 0.0f;
}
/// Fast downsampling using box filter.
///
/// The extents of the image are divided by two and rounded down.
///
/// When the size of the image is odd, this uses a polyphase box filter as explained in:
/// http://developer.nvidia.com/object/np2_mipmapping.html
///
FloatImage * FloatImage::fastDownSample() const
{
nvDebugCheck(m_width != 1 || m_height != 1);
AutoPtr<FloatImage> dst_image( new FloatImage() );
const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2);
dst_image->allocate(m_componentNum, w, h);
// 1D box filter.
if (m_width == 1 || m_height == 1)
{
const uint w = m_width * m_height;
if (w & 1)
{
const float scale = 1.0f / (2 * w + 1);
for(uint c = 0; c < m_componentNum; c++)
{
const float * src = this->channel(c);
float * dst = dst_image->channel(c);
for(uint x = 0; x < w; x++)
{
const float w0 = (w - x);
const float w1 = (w - 0);
const float w2 = (1 + x);
*dst++ = scale * (w0 * src[0] + w1 * src[1] + w2 * src[2]);
src += 2;
}
}
}
else
{
for(uint c = 0; c < m_componentNum; c++)
{
const float * src = this->channel(c);
float * dst = dst_image->channel(c);
for(uint x = 0; x < w; x++)
{
*dst = 0.5f * (src[0] + src[1]);
dst++;
src += 2;
}
}
}
}
// Regular box filter.
else if ((m_width & 1) == 0 && (m_height & 1) == 0)
{
for(uint c = 0; c < m_componentNum; c++)
{
const float * src = this->channel(c);
float * dst = dst_image->channel(c);
for(uint y = 0; y < h; y++)
{
for(uint x = 0; x < w; x++)
{
*dst = 0.25f * (src[0] + src[1] + src[m_width] + src[m_width + 1]);
dst++;
src += 2;
}
src += m_width;
}
}
}
// Polyphase filters.
else if (m_width & 1 && m_height & 1)
{
nvDebugCheck(m_width == 2 * w + 1);
nvDebugCheck(m_height == 2 * h + 1);
const float scale = 1.0f / (m_width * m_height);
for(uint c = 0; c < m_componentNum; c++)
{
const float * src = this->channel(c);
float * dst = dst_image->channel(c);
for(uint y = 0; y < h; y++)
{
const float v0 = (h - y);
const float v1 = (h - 0);
const float v2 = (1 + y);
for (uint x = 0; x < w; x++)
{
const float w0 = (w - x);
const float w1 = (w - 0);
const float w2 = (1 + x);
float f = 0.0f;
f += v0 * (w0 * src[0 * m_width + 2 * x] + w1 * src[0 * m_width + 2 * x + 1] + w2 * src[0 * m_width + 2 * x + 2]);
f += v1 * (w0 * src[1 * m_width + 2 * x] + w1 * src[1 * m_width + 2 * x + 1] + w2 * src[0 * m_width + 2 * x + 2]);
f += v2 * (w0 * src[2 * m_width + 2 * x] + w1 * src[2 * m_width + 2 * x + 1] + w2 * src[0 * m_width + 2 * x + 2]);
*dst = f * scale;
dst++;
}
src += 2 * m_width;
}
}
}
else if (m_width & 1)
{
nvDebugCheck(m_width == 2 * w + 1);
const float scale = 1.0f / (2 * m_width);
for(uint c = 0; c < m_componentNum; c++)
{
const float * src = this->channel(c);
float * dst = dst_image->channel(c);
for(uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
const float w0 = (w - x);
const float w1 = (w - 0);
const float w2 = (1 + x);
float f = 0.0f;
f += w0 * (src[2 * x + 0] + src[m_width + 2 * x + 0]);
f += w1 * (src[2 * x + 1] + src[m_width + 2 * x + 1]);
f += w2 * (src[2 * x + 2] + src[m_width + 2 * x + 2]);
*dst = f * scale;
dst++;
}
src += 2 * m_width;
}
}
}
else if (m_height & 1)
{
nvDebugCheck(m_height == 2 * h + 1);
const float scale = 1.0f / (2 * m_height);
for(uint c = 0; c < m_componentNum; c++)
{
const float * src = this->channel(c);
float * dst = dst_image->channel(c);
for(uint y = 0; y < h; y++)
{
const float v0 = (h - y);
const float v1 = (h - 0);
const float v2 = (1 + y);
for (uint x = 0; x < w; x++)
{
float f = 0.0f;
f += v0 * (src[0 * m_width + 2 * x] + src[0 * m_width + 2 * x + 1]);
f += v1 * (src[1 * m_width + 2 * x] + src[1 * m_width + 2 * x + 1]);
f += v2 * (src[2 * m_width + 2 * x] + src[2 * m_width + 2 * x + 1]);
*dst = f * scale;
dst++;
}
src += 2 * m_width;
}
}
}
return dst_image.release();
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Kernel1 & kernel, WrapMode wm) const
{
const uint w = max(1, m_width / 2);
const uint h = max(1, m_height / 2);
return downSample(kernel, w, h, wm);
}
/// Downsample applying a 1D kernel separately in each dimension.
FloatImage * FloatImage::downSample(const Kernel1 & kernel, uint w, uint h, WrapMode wm) const
{
nvCheck(!(kernel.width() & 1)); // Make sure that kernel m_width is even.
AutoPtr<FloatImage> tmp_image( new FloatImage() );
tmp_image->allocate(m_componentNum, w, m_height);
AutoPtr<FloatImage> dst_image( new FloatImage() );
dst_image->allocate(m_componentNum, w, h);
const float xscale = float(m_width) / float(w);
const float yscale = float(m_height) / float(h);
for(uint c = 0; c < m_componentNum; c++) {
float * tmp_channel = tmp_image->channel(c);
for(uint y = 0; y < m_height; y++) {
for(uint x = 0; x < w; x++) {
float sum = this->applyKernelHorizontal(&kernel, uint(x*xscale), y, c, wm);
const uint tmp_index = tmp_image->index(x, y);
tmp_channel[tmp_index] = sum;
}
}
float * dst_channel = dst_image->channel(c);
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
float sum = this->applyKernelVertical(&kernel, uint(x*xscale), uint(y*yscale), c, wm);
const uint dst_index = dst_image->index(x, y);
dst_channel[dst_index] = sum;
}
}
}
return dst_image.release();
}
/// Apply 2D kernel at the given coordinates and return result.
float FloatImage::applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode wm) const
{
nvDebugCheck(k != NULL);
const uint kernelWidth = k->width();
const int kernelOffset = int(kernelWidth / 2) - 1;
const float * channel = this->channel(c);
float sum = 0.0f;
for(uint i = 0; i < kernelWidth; i++)
{
const int src_y = int(y + i) - kernelOffset;
for(uint e = 0; e < kernelWidth; e++)
{
const int src_x = int(x + e) - kernelOffset;
int idx = this->index(src_x, src_y, wm);
sum += k->valueAt(e, i) * channel[idx];
}
}
return sum;
}
/// Apply 1D vertical kernel at the given coordinates and return result.
float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, int c, WrapMode wm) const
{
nvDebugCheck(k != NULL);
const uint kernelWidth = k->width();
const int kernelOffset = int(kernelWidth / 2) - 1;
const float * channel = this->channel(c);
float sum = 0.0f;
for(uint i = 0; i < kernelWidth; i++)
{
const int src_y = int(y + i) - kernelOffset;
const int idx = this->index(x, src_y, wm);
sum += k->valueAt(i) * channel[idx];
}
return sum;
}
/// Apply 1D horizontal kernel at the given coordinates and return result.
float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, int c, WrapMode wm) const
{
nvDebugCheck(k != NULL);
const uint kernelWidth = k->width();
const int kernelOffset = int(kernelWidth / 2) - 1;
const float * channel = this->channel(c);
float sum = 0.0f;
for(uint e = 0; e < kernelWidth; e++)
{
const int src_x = int(x + e) - kernelOffset;
const int idx = this->index(src_x, y, wm);
sum += k->valueAt(e) * channel[idx];
}
return sum;
}
#if 0
Vec3d bilinear(double u, double v) const
{
u = mod(u*(W-1),W);
v = mod(v*(H-1),H);
Vec3d v1,v2,v3,v4;
int x_small = (int)floor(u);
int x_big = x_small + 1;
int y_small = (int)floor(v);
int y_big = y_small + 1;
if (x_small < 0)
x_small = W-1;
else if (x_big >= W)
x_big = 0;
if (y_small < 0)
y_small = H-1;
else if (y_big >= H)
y_big = 0;
double fractional_X = u - x_small;
double fractional_Y = v - y_small;
if (nchan == 3)
{
v1 = Vec3d(pixel(x_small, y_small)[0], pixel(x_small, y_small)[1], pixel(x_small, y_small)[2]);
v2 = Vec3d(pixel(x_big, y_small)[0], pixel(x_big, y_small)[1], pixel(x_big, y_small)[2]);
v3 = Vec3d(pixel(x_small, y_big)[0], pixel(x_small, y_big)[1], pixel(x_small, y_big)[2]);
v4 = Vec3d(pixel(x_big, y_big)[0], pixel(x_big, y_big)[1], pixel(x_big, y_big)[2]);
}
Vec3d i1 = lerp(v1, v2, fractional_X);
Vec3d i2 = lerp(v3, v4, fractional_X);
return lerp(i1, i2, fractional_Y);
}
Vec3d bicubic(double u, double v) const
{
u = mod(u*(W-1),W);
v = mod(v*(H-1),H);
int x_small1 = (int)floor(u),
x_small2 = x_small1 - 1,
x_big1 = x_small1 + 1,
x_big2 = x_small1 + 2;
int y_small1 = (int)floor(v),
y_small2 = y_small1 - 1,
y_big1 = y_small1 + 1,
y_big2 = y_small1 + 2;
x_small1 = (int)mod(x_small1,W);
x_small2 = (int)mod(x_small2,W);
x_big1 = (int)mod(x_big1,W);
x_big2 = (int)mod(x_big2,W);
y_small1 = (int)mod(y_small1,H);
y_small2 = (int)mod(y_small2,H);
y_big1 = (int)mod(y_big1,H);
y_big2 = (int)mod(y_big2,H);
double fractional_X = u - x_small1;
double fractional_Y = v - y_small1;
if (nchan == 3)
{
// the interpolations across the rows
Vec3d row1 = cubic(Vec3d(pixel(x_small2, y_small2)[0], pixel(x_small2, y_small2)[1], pixel(x_small2, y_small2)[2]),
Vec3d(pixel(x_small1, y_small2)[0], pixel(x_small1, y_small2)[1], pixel(x_small1, y_small2)[2]),
Vec3d(pixel(x_big1, y_small2)[0], pixel(x_big1, y_small2)[1], pixel(x_big1, y_small2)[2]),
Vec3d(pixel(x_big2, y_small2)[0], pixel(x_big2, y_small2)[1], pixel(x_big2, y_small2)[2]),
fractional_X);
Vec3d row2 = cubic(Vec3d(pixel(x_small2, y_small1)[0], pixel(x_small2, y_small1)[1], pixel(x_small2, y_small1)[2]),
Vec3d(pixel(x_small1, y_small1)[0], pixel(x_small1, y_small1)[1], pixel(x_small1, y_small1)[2]),
Vec3d(pixel(x_big1, y_small1)[0], pixel(x_big1, y_small1)[1], pixel(x_big1, y_small1)[2]),
Vec3d(pixel(x_big2, y_small1)[0], pixel(x_big2, y_small1)[1], pixel(x_big2, y_small1)[2]),
fractional_X);
Vec3d row3 = cubic(Vec3d(pixel(x_small2, y_big1)[0], pixel(x_small2, y_big1)[1], pixel(x_small2, y_big1)[2]),
Vec3d(pixel(x_small1, y_big1)[0], pixel(x_small1, y_big1)[1], pixel(x_small1, y_big1)[2]),
Vec3d(pixel(x_big1, y_big1)[0], pixel(x_big1, y_big1)[1], pixel(x_big1, y_big1)[2]),
Vec3d(pixel(x_big2, y_big1)[0], pixel(x_big2, y_big1)[1], pixel(x_big2, y_big1)[2]),
fractional_X);
Vec3d row4 = cubic(Vec3d(pixel(x_small2, y_big2)[0], pixel(x_small2, y_big2)[1], pixel(x_small2, y_big2)[2]),
Vec3d(pixel(x_small1, y_big2)[0], pixel(x_small1, y_big2)[1], pixel(x_small1, y_big2)[2]),
Vec3d(pixel(x_big1, y_big2)[0], pixel(x_big1, y_big2)[1], pixel(x_big1, y_big2)[2]),
Vec3d(pixel(x_big2, y_big2)[0], pixel(x_big2, y_big2)[1], pixel(x_big2, y_big2)[2]),
fractional_X);
// now interpolate across the interpolated rows (the columns)
return cubic(row1,row2,row3,row4,fractional_Y);
}
else
return Vec3d(0.0);
}
Vec3d bicubic2(double u, double v) const
{
u = mod(u*(W-1),W);
v = mod(v*(H-1),H);
int x_small1 = floorf(u),
x_small2 = x_small1 - 1,
x_big1 = int(x_small1 + 1),
x_big2 = int(x_small1 + 2);
int y_small1 = floorf(v),
y_small2 = y_small1 - 1,
y_big1 = y_small1 + 1,
y_big2 = y_small1 + 2;
x_small1 = (int)mod(x_small1,W);
x_small2 = (int)mod(x_small2,W);
x_big1 = (int)mod(x_big1,W);
x_big2 = (int)mod(x_big2,W);
y_small1 = (int)mod(y_small1,H);
y_small2 = (int)mod(y_small2,H);
y_big1 = (int)mod(y_big1,H);
y_big2 = (int)mod(y_big2,H);
double fractional_X = u - x_small1;
double fractional_Y = v - y_small1;
if (nchan == 3)
{
// the interpolations across the rows
Vec3d row1 = cubic2(Vec3d(pixel(x_small2, y_small2)[0], pixel(x_small2, y_small2)[1], pixel(x_small2, y_small2)[2]),
Vec3d(pixel(x_small1, y_small2)[0], pixel(x_small1, y_small2)[1], pixel(x_small1, y_small2)[2]),
Vec3d(pixel(x_big1, y_small2)[0], pixel(x_big1, y_small2)[1], pixel(x_big1, y_small2)[2]),
Vec3d(pixel(x_big2, y_small2)[0], pixel(x_big2, y_small2)[1], pixel(x_big2, y_small2)[2]),
fractional_X);
Vec3d row2 = cubic2(Vec3d(pixel(x_small2, y_small1)[0], pixel(x_small2, y_small1)[1], pixel(x_small2, y_small1)[2]),
Vec3d(pixel(x_small1, y_small1)[0], pixel(x_small1, y_small1)[1], pixel(x_small1, y_small1)[2]),
Vec3d(pixel(x_big1, y_small1)[0], pixel(x_big1, y_small1)[1], pixel(x_big1, y_small1)[2]),
Vec3d(pixel(x_big2, y_small1)[0], pixel(x_big2, y_small1)[1], pixel(x_big2, y_small1)[2]),
fractional_X);
Vec3d row3 = cubic2(Vec3d(pixel(x_small2, y_big1)[0], pixel(x_small2, y_big1)[1], pixel(x_small2, y_big1)[2]),
Vec3d(pixel(x_small1, y_big1)[0], pixel(x_small1, y_big1)[1], pixel(x_small1, y_big1)[2]),
Vec3d(pixel(x_big1, y_big1)[0], pixel(x_big1, y_big1)[1], pixel(x_big1, y_big1)[2]),
Vec3d(pixel(x_big2, y_big1)[0], pixel(x_big2, y_big1)[1], pixel(x_big2, y_big1)[2]),
fractional_X);
Vec3d row4 = cubic2(Vec3d(pixel(x_small2, y_big2)[0], pixel(x_small2, y_big2)[1], pixel(x_small2, y_big2)[2]),
Vec3d(pixel(x_small1, y_big2)[0], pixel(x_small1, y_big2)[1], pixel(x_small1, y_big2)[2]),
Vec3d(pixel(x_big1, y_big2)[0], pixel(x_big1, y_big2)[1], pixel(x_big1, y_big2)[2]),
Vec3d(pixel(x_big2, y_big2)[0], pixel(x_big2, y_big2)[1], pixel(x_big2, y_big2)[2]),
fractional_X);
// now interpolate across the interpolated rows (the columns)
return cubic2(row1,row2,row3,row4,fractional_Y);
}
else
return Vec3d(0.0);
}
#endif

241
src/nvimage/FloatImage.h Normal file
View File

@ -0,0 +1,241 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_IMAGE_FLOATIMAGE_H
#define NV_IMAGE_FLOATIMAGE_H
#include <nvcore/Debug.h>
#include <nvcore/Containers.h> // clamp
#include <nvimage/nvimage.h>
namespace nv
{
class Image;
class Kernel1;
class Kernel2;
/// Multicomponent floating point image class.
class FloatImage
{
public:
enum WrapMode {
WrapMode_Clamp,
WrapMode_Repeat,
WrapMode_Mirror
};
NVIMAGE_API FloatImage();
NVIMAGE_API FloatImage(const Image * img);
NVIMAGE_API virtual ~FloatImage();
/** @name Conversion. */
//@{
NVIMAGE_API void initFrom(const Image * img);
NVIMAGE_API Image * createImage(uint base_component = 0, uint num = 4) const;
NVIMAGE_API Image * createImageGammaCorrect(float gamma = 2.2f) const;
//@}
/** @name Allocation. */
//@{
NVIMAGE_API void allocate(uint c, uint w, uint h);
NVIMAGE_API void free(); // Does not clear members.
//@}
/** @name Manipulation. */
//@{
NVIMAGE_API void clear(float f=0.0f);
//NVIMAGE_API void ComputeMipmaps();
//NVIMAGE_API void ComputeNormalMap(const float height_scale = 1.0f);
//NVIMAGE_API void Clamp(uint base_component, uint num);
//NVIMAGE_API void NormalizeColor(uint base_component);
NVIMAGE_API void normalize(uint base_component);
NVIMAGE_API void packNormals(uint base_component);
NVIMAGE_API void expandNormals(uint base_component);
NVIMAGE_API void scaleBias(uint base_component, uint num, float scale, float add);
NVIMAGE_API void clamp(float low, float high);
NVIMAGE_API void toLinear(uint base_component, uint num, float gamma = 2.2f);
NVIMAGE_API void toGamma(uint base_component, uint num, float gamma = 2.2f);
NVIMAGE_API void exponentiate(uint base_component, uint num, float power);
NVIMAGE_API FloatImage * fastDownSample() const;
NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const;
NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const;
//@}
NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode wm) const;
NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, int c, WrapMode wm) const;
NVIMAGE_API float applyKernelHorizontal(const Kernel1 * k, int x, int y, int c, WrapMode wm) const;
uint width() const { return m_width; }
uint height() const { return m_height; }
uint componentNum() const { return m_componentNum; }
uint count() const { return m_count; }
/** @name Pixel access. */
//@{
const float * channel(uint c) const;
float * channel(uint c);
const float * scanline(uint y, uint c) const;
float * scanline(uint y, uint c);
void setPixel(float f, uint x, uint y, uint c);
float pixel(uint x, uint y, uint c) const;
void setPixel(float f, uint idx);
float pixel(uint idx) const;
float nearest(int x, int y, int c, WrapMode wm) const;
float nearest(float x, float y, int c, WrapMode wm) const;
float linear(float x, float y, int c, WrapMode wm) const;
float nearest_clamp(float x, float y, int c) const;
float nearest_repeat(float x, float y, int c) const;
float nearest_mirror(float x, float y, int c) const;
float linear_clamp(float x, float y, int c) const;
float linear_repeat(float x, float y, int c) const;
float linear_mirror(float x, float y, int c) const;
//@}
public:
uint index(uint x, uint y) const;
uint indexClamp(int x, int y) const;
uint indexRepeat(int x, int y) const;
uint indexMirror(int x, int y) const;
uint index(int x, int y, WrapMode wm) const;
public:
uint16 m_width; ///< Width of the texture.
uint16 m_height; ///< Height of the texture.
uint32 m_componentNum; ///< Number of components.
uint32 m_count; ///< Image pixel count.
float * m_mem;
};
/// Get const channel pointer.
inline const float * FloatImage::channel(uint c) const
{
nvDebugCheck(m_mem != NULL);
nvDebugCheck(c < m_componentNum);
return m_mem + c * m_width * m_height;
}
/// Get channel pointer.
inline float * FloatImage::channel(uint c) {
nvDebugCheck(m_mem != NULL);
nvDebugCheck(c < m_componentNum);
return m_mem + c * m_width * m_height;
}
/// Get const scanline pointer.
inline const float * FloatImage::scanline(uint y, uint c) const
{
nvDebugCheck(y < m_height);
return channel(c) + y * m_width;
}
/// Get scanline pointer.
inline float * FloatImage::scanline(uint y, uint c)
{
nvDebugCheck(y < m_height);
return channel(c) + y * m_width;
}
/// Set pixel component.
inline void FloatImage::setPixel(float f, uint x, uint y, uint c)
{
nvDebugCheck(m_mem != NULL);
nvDebugCheck(x < m_width);
nvDebugCheck(y < m_height);
nvDebugCheck(c < m_componentNum);
m_mem[(c * m_height + y) * m_width + x] = f;
}
/// Get pixel component.
inline float FloatImage::pixel(uint x, uint y, uint c) const
{
nvDebugCheck(m_mem != NULL);
nvDebugCheck(x < m_width);
nvDebugCheck(y < m_height);
nvDebugCheck(c < m_componentNum);
return m_mem[(c * m_height + y) * m_width + x];
}
/// Set pixel component.
inline void FloatImage::setPixel(float f, uint idx)
{
nvDebugCheck(idx < m_count);
m_mem[idx] = f;
}
/// Get pixel component.
inline float FloatImage::pixel(uint idx) const
{
nvDebugCheck(idx < m_count);
return m_mem[idx];
}
inline uint FloatImage::index(uint x, uint y) const
{
nvDebugCheck(x < m_width);
nvDebugCheck(y < m_height);
return y * m_width + x;
}
inline uint FloatImage::indexClamp(int x, int y) const
{
return nv::clamp(y, int(0), int(m_height-1)) * m_width + nv::clamp(x, int(0), int(m_width-1));
}
inline int repeat_remainder(int a, int b)
{
if (a >= 0) return a % b;
else return (a + 1) % b + b - 1;
}
inline uint FloatImage::indexRepeat(int x, int y) const
{
return repeat_remainder(y, m_height) * m_width + repeat_remainder(x, m_width);
}
// @@ This could be way more efficient.
inline uint FloatImage::indexMirror(int x, int y) const
{
while ((x < 0) || (x > (m_width - 1))) {
if (x < 0) x = -x;
if (x >= m_width) x = m_width + m_width - x - 1;
}
while ((y < 0) || (y > (m_height - 1))) {
if (y < 0) y = -y;
if (y >= m_height) y = m_height + m_height - y - 1;
}
return index(x, y);
}
inline uint FloatImage::index(int x, int y, WrapMode wm) const
{
if (wm == WrapMode_Clamp) return indexClamp(x, y);
if (wm == WrapMode_Repeat) return indexRepeat(x, y);
/*if (wm == WrapMode_Mirror)*/ return indexMirror(x, y);
}
} // nv namespace
#endif // NV_IMAGE_FLOATIMAGE_H

751
src/nvimage/HoleFilling.cpp Normal file
View File

@ -0,0 +1,751 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Containers.h>
#include <nvmath/nvmath.h>
#include <nvimage/HoleFilling.h>
#include <nvimage/FloatImage.h>
using namespace nv;
// This is a variation of Sapiro's inpainting method.
void nv::fillExtrapolateOnce(FloatImage * img, BitMap * bmap)
{
nvCheck(img != NULL);
nvCheck(bmap != NULL);
const int w = img->width();
const int h = img->height();
const int count = img->componentNum();
nvCheck(bmap->width() == uint(w));
nvCheck(bmap->height() == uint(h));
BitMap * newbmap = new BitMap(w, h);
for(int c = 0; c < count; c++) {
float * channel = img->channel(c);
for(int y = 0; y < h; y++) {
for(int x = 0; x < w; x++) {
if (bmap->bitAt(x, y)) {
// Not a hole.
newbmap->setBitAt(x, y);
continue;
}
const bool west = bmap->bitAt(img->indexClamp(x-1, y));
const bool east = bmap->bitAt(img->indexClamp(x+1, y));
const bool north = bmap->bitAt(img->indexClamp(x, y-1));
const bool south = bmap->bitAt(img->indexClamp(x, y+1));
const bool northwest = bmap->bitAt(img->indexClamp(x-1, y-1));
const bool northeast = bmap->bitAt(img->indexClamp(x+1, y-1));
const bool southwest = bmap->bitAt(img->indexClamp(x-1, y+1));
const bool southeast = bmap->bitAt(img->indexClamp(x+1, y+1));
int num = west + east + north + south + northwest + northeast + southwest + southeast;
if (num != 0) {
float average = 0.0f;
if (num == 3 && west && northwest && southwest) {
average = channel[img->indexClamp(x-1, y)];
}
else if (num == 3 && east && northeast && southeast) {
average = channel[img->indexClamp(x+1, y)];
}
else if (num == 3 && north && northwest && northeast) {
average = channel[img->indexClamp(x, y-1)];
}
else if (num == 3 && south && southwest && southeast) {
average = channel[img->indexClamp(x, y+1)];
}
else {
float total = 0.0f;
if (west) { average += 1 * channel[img->indexClamp(x-1, y)]; total += 1; }
if (east) { average += 1 * channel[img->indexClamp(x+1, y)]; total += 1; }
if (north) { average += 1 * channel[img->indexClamp(x, y-1)]; total += 1; }
if (south) { average += 1 * channel[img->indexClamp(x, y+1)]; total += 1; }
if (northwest) { average += channel[img->indexClamp(x-1, y-1)]; ++total; }
if (northeast) { average += channel[img->indexClamp(x+1, y-1)]; ++total; }
if (southwest) { average += channel[img->indexClamp(x-1, y+1)]; ++total; }
if (southeast) { average += channel[img->indexClamp(x+1, y+1)]; ++total; }
average /= total;
}
channel[img->indexClamp(x, y)] = average;
newbmap->setBitAt(x, y);
}
}
}
}
// Update the bit mask.
swap(*newbmap, *bmap);
}
void nv::fillExtrapolateNTimes(FloatImage * img, BitMap * bmap, int n)
{
nvCheck(img != NULL);
nvCheck(bmap != NULL);
nvCheck(n > 0);
for(int i = 0; i < n; i++)
{
fillExtrapolateOnce(img, bmap);
}
}
namespace {
struct Neighbor {
uint16 x;
uint16 y;
uint32 d;
};
// Compute euclidean squared distance.
static uint dist( uint16 ax, uint16 ay, uint16 bx, uint16 by ) {
int dx = bx - ax;
int dy = by - ay;
return uint(dx*dx + dy*dy);
}
// Check neighbour, this is the core of the EDT algorithm.
static void checkNeighbour( int x, int y, Neighbor * e, const Neighbor & n ) {
nvDebugCheck(e != NULL);
uint d = dist( x, y, n.x, n.y );
if( d < e->d ) {
e->x = n.x;
e->y = n.y;
e->d = d;
}
}
} // namespace
// Voronoi filling using EDT-4
void nv::fillVoronoi(FloatImage * img, const BitMap & bmap)
{
nvCheck(img != NULL);
const int w = img->width();
const int h = img->height();
const int count = img->componentNum();
nvCheck(bmap.width() == uint(w));
nvCheck(bmap.height() == uint(h));
Array<Neighbor> edm;
edm.resize(w * h);
int x, y;
int x0, x1, y0, y1;
// Init edm.
for( y = 0; y < h; y++ ) {
for( x = 0; x < w; x++ ) {
if( bmap.bitAt(x, y) ) {
edm[y * w + x].x = x;
edm[y * w + x].y = y;
edm[y * w + x].d = 0;
}
else {
edm[y * w + x].x = w;
edm[y * w + x].y = h;
edm[y * w + x].d = w*w + h*h;
}
}
}
// First pass.
for( y = 0; y < h; y++ ) {
for( x = 0; x < w; x++ ) {
x0 = clamp(x-1, 0, w-1); // @@ Wrap?
x1 = clamp(x+1, 0, w-1);
y0 = clamp(y-1, 0, h-1);
Neighbor & e = edm[y * w + x];
checkNeighbour(x, y, &e, edm[y0 * w + x0]);
checkNeighbour(x, y, &e, edm[y0 * w + x]);
checkNeighbour(x, y, &e, edm[y0 * w + x1]);
checkNeighbour(x, y, &e, edm[y * w + x0]);
}
for( x = w-1; x >= 0; x-- ) {
x1 = clamp(x+1, 0, w-1);
Neighbor & e = edm[y * w + x];
checkNeighbour(x, y, &e, edm[y * w + x1]);
}
}
// Third pass.
for( y = h-1; y >= 0; y-- ) {
for( x = w-1; x >= 0; x-- ) {
x0 = clamp(x-1, 0, w-1);
x1 = clamp(x+1, 0, w-1);
y1 = clamp(y+1, 0, h-1);
Neighbor & e = edm[y * w + x];
checkNeighbour(x, y, &e, edm[y * w + x1]);
checkNeighbour(x, y, &e, edm[y1 * w + x0]);
checkNeighbour(x, y, &e, edm[y1 * w + x]);
checkNeighbour(x, y, &e, edm[y1 * w + x1]);
}
for( x = 0; x < w; x++ ) {
x0 = clamp(x-1, 0, w-1);
Neighbor & e = edm[y * w + x];
checkNeighbour(x, y, &e, edm[y * w + x0]);
}
}
// Fill empty holes.
for( y = 0; y < h; y++ ) {
for( x = 0; x < w; x++ ) {
const int sx = edm[y * w + x].x;
const int sy = edm[y * w + x].y;
nvDebugCheck(sx < w && sy < h);
if( sx != x || sy != y ) {
for(int c = 0; c < count; c++ ) {
img->setPixel(img->pixel(sx, sy, c), x, y, c);
}
}
}
}
}
void nv::fillBlur(FloatImage * img, const BitMap & bmap)
{
nvCheck(img != NULL);
// @@ Apply a 3x3 kernel.
}
static bool downsample(const FloatImage * src, const BitMap * srcMask, const FloatImage ** _dst, const BitMap ** _dstMask)
{
const uint w = src->width();
const uint h = src->height();
const uint count = src->componentNum();
// count holes in srcMask, return false if fully filled.
uint holes = 0;
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
holes += srcMask->bitAt(x, y) == 0;
}
}
if (holes == 0 || (w == 2 || h == 2)) {
// Stop when no holes or when the texture is very small.
return false;
}
// Apply box filter to image and mask and return true.
const uint nw = w / 2;
const uint nh = h / 2;
FloatImage * dst = new FloatImage();
dst->allocate(count, nw, nh);
BitMap * dstMask = new BitMap(nw, nh);
for(uint c = 0; c < count; c++) {
for(uint y = 0; y < nh; y++) {
for(uint x = 0; x < nw; x++) {
const uint x0 = 2 * x + 0;
const uint x1 = 2 * x + 1;
const uint y0 = 2 * y + 0;
const uint y1 = 2 * y + 1;
const float f0 = src->pixel(x0, y0, c);
const float f1 = src->pixel(x1, y0, c);
const float f2 = src->pixel(x0, y1, c);
const float f3 = src->pixel(x1, y1, c);
const bool b0 = srcMask->bitAt(x0, y0);
const bool b1 = srcMask->bitAt(x1, y0);
const bool b2 = srcMask->bitAt(x0, y1);
const bool b3 = srcMask->bitAt(x1, y1);
if (b0 || b1 || b2 || b3) {
// Set bit mask.
dstMask->setBitAt(x, y);
// Set pixel.
float value = 0.0f;
int total = 0;
if (b0) { value += f0; total++; }
if (b1) { value += f1; total++; }
if (b2) { value += f2; total++; }
if (b3) { value += f3; total++; }
dst->setPixel(value / total, x, y, c);
}
}
}
}
*_dst = dst;
*_dstMask = dstMask;
return true;
}
// This is the filter used in the Lumigraph paper. The Unreal engine uses something similar.
void nv::fillPullPush(FloatImage * img, const BitMap & bmap)
{
nvCheck(img != NULL);
const uint count = img->componentNum();
const uint w = img->width();
const uint h = img->height();
const uint num = log2(max(w,h));
// Build mipmap chain.
Array<const FloatImage *> mipmaps(num);
Array<const BitMap *> mipmapMasks(num);
mipmaps.append(img);
mipmapMasks.append(&bmap);
const FloatImage * current;
const BitMap * currentMask;
// Compute mipmap chain.
while(downsample(mipmaps.back(), mipmapMasks.back(), &current, &currentMask))
{
mipmaps.append(current);
mipmapMasks.append(currentMask);
}
// Sample mipmaps until non-hole is found.
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
uint sx = x;
uint sy = y;
const uint levelCount = mipmaps.count();
for(uint l = 0; l < levelCount; l++) {
if (mipmapMasks[l]->bitAt(sx, sy))
{
// Sample mipmaps[l](sx, sy) and copy to img(x, y)
for(uint c = 0; c < count; c++) {
img->setPixel(mipmaps[l]->pixel(sx, sy, c), x, y, c);
}
break;
}
sx /= 2;
sy /= 2;
}
}
}
deleteAll(mipmaps);
deleteAll(mipmapMasks);
}
/*
void nv::fillSeamFix(FloatImage * img, const BitMap & bmap)
{
}
*/
#if 0 // Code below is under the BPL license.
/**
DoPixelSeamFix
10-20-02
Looks in the 5x5 local neighborhood (LocalPixels) of the desired pixel to fill.
It tries to build a quadratic model of the neighborhood surface to use in
extrapolating. You need 5 pixels to establish a 2d quadratic curve.
This is really just a nice generic way to extrapolate pixels. It also happens
to work great for seam-fixing.
Note that I'm working on normals, but I treat them just as 3 scalars and normalize
at the end. To be more correct, I would work on the surface of a sphere, but that
just seems like way too much work.
**/
struct LocalPixels
{
// 5x5 neighborhood
// the center is at result
// index [y][x]
bool fill[5][5];
float data[5][5];
mutable float result;
mutable float weight;
bool Quad3SubH(gVec4 * pQ,int row) const
{
const bool * pFill = fill[row];
const float * pDat = data[row];
if ( pFill[1] && pFill[2] && pFill[3] )
{
// good row
*pQ = pDat[1] - 2.f * pDat[2] + pDat[3];
return true;
}
else if ( pFill[0] && pFill[1] && pFill[2] )
{
// good row
*pQ = pDat[0] - 2.f * pDat[1] + pDat[2];
return true;
}
else if ( pFill[2] && pFill[3] && pFill[4] )
{
// good row
*pQ = pDat[2] - 2.f * pDat[3] + pDat[4];
return true;
}
return false;
}
// improve result with a horizontal quad in row 1 and/or
bool Quad3SubV(gVec4 * pQ,int col) const
{
if ( fill[1][col] && fill[2][col] && fill[3][col] )
{
// good row
*pQ = data[1][col] - 2.f * data[2][col] + data[3][col];
return true;
}
else if ( fill[0][col] && fill[1][col] && fill[2][col] )
{
// good row
*pQ = data[0][col] - 2.f * data[1][col] + data[2][col];
return true;
}
else if ( fill[2][col] && fill[3][col] && fill[4][col] )
{
// good row
*pQ = data[2][col] - 2.f * data[3][col] + data[4][col];
return true;
}
return false;
}
bool Quad3H(gVec4 * pQ) const
{
if ( ! Quad3SubH(pQ,1) )
{
return Quad3SubH(pQ,3);
}
gVec4 q(0,0,0,0); // initializer not needed, just make it shut up
if ( Quad3SubH(&q,3) )
{
// got q and pQ
*pQ = (*pQ+q)*0.5f;
}
return true;
}
bool Quad3V(gVec4 * pQ) const
{
if ( ! Quad3SubV(pQ,1) )
{
return Quad3SubV(pQ,3);
}
gVec4 q(0,0,0,0); // initializer not needed, just make it shut up
if ( Quad3SubV(&q,3) )
{
// got q and pQ
*pQ = (*pQ+q)*0.5f;
}
return true;
}
// Quad returns ([0]+[2] - 2.f*[1])
// a common want is [1] - ([0]+[2])*0.5f ;
// so use -0.5f*Quad
bool TryQuads() const
{
bool res = false;
// look for a pair that straddles the middle:
if ( fill[2][1] && fill[2][3] )
{
// got horizontal straddle
gVec4 q;
if ( Quad3H(&q) )
{
result += (data[2][1] + data[2][3] - q) * 0.5f;
weight += 1.f;
res = true;
}
}
if ( fill[1][2] && fill[3][2] )
{
// got vertical straddle
gVec4 q;
if ( Quad3V(&q) )
{
result += (data[1][2] + data[3][2] - q) * 0.5f;
weight += 1.f;
res = true;
}
}
// look for pairs that lead into the middle :
if ( fill[2][0] && fill[2][1] )
{
// got left-side pair
gVec4 q;
if ( Quad3H(&q) )
{
result += data[2][1]*2.f - data[2][0] + q;
weight += 1.f;
res = true;
}
}
if ( fill[2][3] && fill[2][4] )
{
// got right-side pair
gVec4 q;
if ( Quad3H(&q) )
{
result += data[2][3]*2.f - data[2][4] + q;
weight += 1.f;
res = true;
}
}
if ( fill[0][2] && fill[1][2] )
{
// got left-side pair
gVec4 q;
if ( Quad3V(&q) )
{
result += data[1][2]*2.f - data[0][2] + q;
weight += 1.f;
res = true;
}
}
if ( fill[3][2] && fill[4][2] )
{
// got right-side pair
gVec4 q;
if ( Quad3V(&q) )
{
result += data[3][2]*2.f - data[4][2] + q;
weight += 1.f;
res = true;
}
}
return res;
}
bool TryPlanar() const
{
// four cases :
const int indices[] =
{
2,1, 1,2, 1,1,
2,1, 3,2, 3,1,
2,3, 1,2, 1,3,
2,3, 3,2, 3,3
};
bool res = false;
for(int i=0;i<4;i++)
{
const int * I = indices + i*6;
if ( ! fill[ I[0] ][ I[1] ] )
continue;
if ( ! fill[ I[2] ][ I[3] ] )
continue;
if ( ! fill[ I[4] ][ I[5] ] )
continue;
result += data[ I[0] ][ I[1] ] + data[ I[2] ][ I[3] ] - data[ I[4] ][ I[5] ];
weight += 1.f;
res = true;
}
return res;
}
bool TryTwos() const
{
bool res = false;
if ( fill[2][1] && fill[2][3] )
{
result += (data[2][1] + data[2][3]) * 0.5f;
weight += 1.f;
res = true;
}
if ( fill[1][2] && fill[3][2] )
{
result += (data[1][2] + data[3][2]) * 0.5f;
weight += 1.f;
res = true;
}
// four side-rotates :
const int indices[] =
{
2,1, 2,0,
2,3, 2,4,
1,2, 0,2,
3,2, 4,2,
};
for(int i=0;i<4;i++)
{
const int * I = indices + i*4;
if ( ! fill[ I[0] ][ I[1] ] )
continue;
if ( ! fill[ I[2] ][ I[3] ] )
continue;
result += data[ I[0] ][ I[1] ]*2.f - data[ I[2] ][ I[3] ];
weight += 1.f;
res = true;
}
return res;
}
bool DoLocalPixelFill() const
{
result = gVec4::zero;
weight = 0.f;
if ( TryQuads() )
return true;
if ( TryPlanar() )
return true;
return TryTwos();
}
}; // LocalPixels -----------------------------------------------
void gNormalMap::DoPixelSeamFix()
{
gLog::Printf("gNormalMap::DoPixelSeamFix..");
const int desiredTicks = 30;
const int heightPerTick = NUM_SEAMFIX_PASSES * m_height / desiredTicks;
int tick = 0;
for(int pass=0;pass<NUM_SEAMFIX_PASSES;pass++)
{
for(int yb=0;yb<m_height;yb++)
{
gVec4 * pRow = m_normals + m_width * yb;
const EState * pStateRow = m_states + m_width * yb;
for(int xb=0;xb<m_width;xb++)
{
if ( pStateRow[xb] != eNull && pStateRow[xb] != eEdge )
{
ASSERT( ! IsNull(pRow[xb]) );
continue; // it's got a pixel
}
// can be non-null, if it wasn't actually inside any tri,
// but got the anti-aliased edge effect of a tri
// replace edge pixels with seam-fix here
//ASSERT( IsNull(pRow[xb]) );
// make the local neighborhood:
int numFill = 0;
LocalPixels lp;
for(int ny=0;ny<5;ny++)
{
int y = (yb + ny - 2);
if ( y < 0 || y >= m_height )
{
// out of range
for(int i=0;i<5;i++)
{
lp.fill[ny][i] = false;
}
continue;
}
gVec4 * pRow = m_normals + m_width * y;
const EState * pStateRow = m_states + m_width * y;
for(int nx=0;nx<5;nx++)
{
int x = (xb + nx - 2);
if ( x < 0 || x >= m_width )
{
lp.fill[ny][nx] = false;
}
else if ( pStateRow[x] == eNull || pStateRow[x] == eEdge )
{
lp.fill[ny][nx] = false;
}
else
{
lp.fill[ny][nx] = true;
lp.data[ny][nx] = pRow[x];
numFill++;
}
}
}
// need at least 3 to do anything decent
if ( numFill < 2 )
continue;
ASSERT(lp.fill[2][2] == false);
if ( lp.DoLocalPixelFill() )
{
if ( lp.result.MutableVec3().NormalizeSafe() )
{
pRow[xb] = lp.result;
pRow[xb][3] /= lp.weight;
}
}
}
if ( ++tick == heightPerTick )
{
tick = 0;
gLog::Printf(".");
}
}
// now run back over and stamp anything that's not null as being ok
for(int y=0;y<m_height;y++)
{
const gVec4 * pRow = m_normals + m_width * y;
EState * pStateRow = m_states + m_width * y;
for(int x=0;x<m_width;x++)
{
if ( ( pStateRow[x] == eNull || pStateRow[x] == eEdge ) && ! IsNull(pRow[x]) )
{
pStateRow[x] = eSeamFixed;
}
}
}
}
gLog::Printf("done\n");
}
#endif // 0

96
src/nvimage/HoleFilling.h Normal file
View File

@ -0,0 +1,96 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_IMAGE_HOLEFILLING_H
#define NV_IMAGE_HOLEFILLING_H
#include <nvcore/BitArray.h>
#include <nvimage/nvimage.h>
namespace nv
{
class FloatImage;
/// Bit mask.
class BitMap
{
public:
BitMap(uint w, uint h) :
m_width(w), m_height(h), m_bitArray(w*h)
{
}
const uint width() const { return m_width; }
const uint height() const { return m_height; }
bool bitAt(uint x, uint y) const
{
nvDebugCheck(x < m_width && y < m_height);
return m_bitArray.bitAt(y * m_width + x);
}
bool bitAt(uint idx) const
{
return m_bitArray.bitAt(idx);
}
void setBitAt(uint x, uint y)
{
nvDebugCheck(x < m_width && y < m_height);
m_bitArray.setBitAt(y * m_width + x);
}
void setBitAt(uint idx)
{
m_bitArray.setBitAt(idx);
}
void clearBitAt(uint x, uint y)
{
nvDebugCheck(x < m_width && y < m_height);
m_bitArray.clearBitAt(y * m_width + x);
}
void clearBitAt(uint idx)
{
m_bitArray.clearBitAt(idx);
}
void clearAll()
{
m_bitArray.clearAll();
}
void setAll()
{
m_bitArray.setAll();
}
void toggleAll()
{
m_bitArray.toggleAll();
}
friend void swap(BitMap & a, BitMap & b)
{
nvCheck(a.m_width == b.m_width);
nvCheck(a.m_height == b.m_height);
//swap(const_cast<uint &>(a.m_width), const_cast<uint &>(b.m_width));
//swap(const_cast<uint &>(a.m_height), const_cast<uint &>(b.m_height));
swap(a.m_bitArray, b.m_bitArray);
}
private:
const uint m_width;
const uint m_height;
BitArray m_bitArray;
};
NVIMAGE_API void fillVoronoi(FloatImage * img, const BitMap & bmap);
NVIMAGE_API void fillBlur(FloatImage * img, const BitMap & bmap);
NVIMAGE_API void fillPullPush(FloatImage * img, const BitMap & bmap);
NVIMAGE_API void fillExtrapolateOnce(FloatImage * img, BitMap * bmap);
NVIMAGE_API void fillExtrapolateNTimes(FloatImage * img, BitMap * bmap, int n);
} // nv namespace
#endif // NV_IMAGE_HOLEFILLING_H

125
src/nvimage/Image.cpp Normal file
View File

@ -0,0 +1,125 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Debug.h>
#include <nvcore/Ptr.h>
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include <nvimage/ImageIO.h>
using namespace nv;
Image::Image() : m_width(0), m_height(0), m_format(Format_RGB), m_data(NULL)
{
}
Image::~Image()
{
free();
}
void Image::allocate(uint w, uint h)
{
free();
m_width = w;
m_height = h;
m_data = new Color32[w*h];
}
bool Image::load(const char * name)
{
free();
AutoPtr<Image> img(ImageIO::load(name));
if (img == NULL) {
return false;
}
swap(m_width, img->m_width);
swap(m_height, img->m_height);
swap(m_format, img->m_format);
swap(m_data, img->m_data);
return true;
}
void Image::wrap(void * data, uint w, uint h)
{
free();
m_data = (Color32 *)data;
m_width = w;
m_height = h;
}
void Image::unwrap()
{
m_data = NULL;
m_width = 0;
m_height = 0;
}
void Image::free()
{
delete m_data;
m_data = NULL;
}
uint Image::width() const
{
return m_width;
}
uint Image::height() const
{
return m_height;
}
const Color32 * Image::scanline(uint h) const
{
nvDebugCheck(h < m_height);
return m_data + h * m_width;
}
Color32 * Image::scanline(uint h)
{
nvDebugCheck(h < m_height);
return m_data + h * m_width;
}
const Color32 * Image::pixels() const
{
return m_data;
}
Color32 * Image::pixels()
{
return m_data;
}
const Color32 & Image::pixel(uint idx) const
{
nvDebugCheck(idx < m_width * m_height);
return m_data[idx];
}
Color32 & Image::pixel(uint idx)
{
nvDebugCheck(idx < m_width * m_height);
return m_data[idx];
}
Image::Format Image::format() const
{
return m_format;
}
void Image::setFormat(Image::Format f)
{
m_format = f;
}

77
src/nvimage/Image.h Normal file
View File

@ -0,0 +1,77 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_IMAGE_IMAGE_H
#define NV_IMAGE_IMAGE_H
#include <nvcore/Debug.h>
#include <nvimage/nvimage.h>
namespace nv
{
class Color32;
/// 32 bit RGBA image.
class Image
{
public:
enum Format
{
Format_RGB,
Format_ARGB,
};
NVIMAGE_API Image();
NVIMAGE_API ~Image();
NVIMAGE_API void allocate(uint w, uint h);
NVIMAGE_API bool load(const char * name);
NVIMAGE_API void wrap(void * data, uint w, uint h);
NVIMAGE_API void unwrap();
NVIMAGE_API uint width() const;
NVIMAGE_API uint height() const;
NVIMAGE_API const Color32 * scanline(uint h) const;
NVIMAGE_API Color32 * scanline(uint h);
NVIMAGE_API const Color32 * pixels() const;
NVIMAGE_API Color32 * pixels();
NVIMAGE_API const Color32 & pixel(uint idx) const;
NVIMAGE_API Color32 & pixel(uint idx);
const Color32 & pixel(uint x, uint y) const;
Color32 & pixel(uint x, uint y);
NVIMAGE_API Format format() const;
NVIMAGE_API void setFormat(Format f);
private:
void free();
private:
uint m_width;
uint m_height;
Format m_format;
Color32 * m_data;
};
inline const Color32 & Image::pixel(uint x, uint y) const
{
nvDebugCheck(x < width() && y < height());
return pixel(y * width() + x);
}
inline Color32 & Image::pixel(uint x, uint y)
{
nvDebugCheck(x < width() && y < height());
return pixel(y * width() + x);
}
} // nv namespace
#endif // NV_IMAGE_IMAGE_H

863
src/nvimage/ImageIO.cpp Normal file
View File

@ -0,0 +1,863 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Ptr.h>
#include <nvcore/Containers.h>
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvmath/Color.h>
#include "ImageIO.h"
#include "Image.h"
#include "FloatImage.h"
#include "TgaFile.h"
// Extern
#if defined(HAVE_JPEG)
extern "C" {
# include <jpeglib.h>
}
#endif
#if defined(HAVE_PNG)
# include <png.h>
#endif
#if defined(HAVE_TIFF)
# define _TIFF_DATA_TYPEDEFS_
# include <tiffio.h>
#endif
using namespace nv;
namespace {
// Array of image load plugins.
// static HashMap<String, ImageInput_Plugin> s_plugin_load_map;
// Array of image save plugins.
// static HashMap<String, ImageOutput_Plugin> s_plugin_save_map;
struct Color555 {
uint16 b : 5;
uint16 g : 5;
uint16 r : 5;
};
} // namespace
Image * nv::ImageIO::load(const char * name)
{
StdInputStream stream(name);
if (stream.isError()) {
return false;
}
return load(name, stream);
}
Image * nv::ImageIO::load(const char * name, Stream & s)
{
const char * extension = Path::extension(name);
if (strCaseCmp(extension, ".tga") == 0) {
return loadTGA(s);
}
#if defined(HAVE_JPEG)
if (strCaseCmp(extension, ".jpg") == 0 || strCaseCmp(extension, ".jpeg") == 0) {
return loadJPG(s);
}
#endif
#if defined(HAVE_PNG)
if (strCaseCmp(extension, ".png") == 0) {
return loadPNG(s);
}
#endif
// @@ use image plugins?
return NULL;
}
/// Load TGA image.
Image * nv::ImageIO::loadTGA(Stream & s)
{
nvCheck(!s.isError());
TgaHeader tga;
s << tga;
s.seek(TgaHeader::Size + tga.id_length);
// Get header info.
bool rle = false;
bool pal = false;
bool rgb = false;
bool grey = false;
switch( tga.image_type ) {
case TGA_TYPE_RLE_INDEXED:
rle = true;
// no break is intended!
case TGA_TYPE_INDEXED:
if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) {
nvDebug( "*** ImageIO::loadTGA: Error, only 24bit paletted images are supported.\n" );
return false;
}
pal = true;
break;
case TGA_TYPE_RLE_RGB:
rle = true;
// no break is intended!
case TGA_TYPE_RGB:
rgb = true;
break;
case TGA_TYPE_RLE_GREY:
rle = true;
// no break is intended!
case TGA_TYPE_GREY:
grey = true;
break;
default:
nvDebug( "*** ImageIO::loadTGA: Error, unsupported image type.\n" );
return false;
}
const uint pixel_size = (tga.pixel_size/8);
nvDebugCheck(pixel_size <= 4);
const uint size = tga.width * tga.height * pixel_size;
// Read palette
uint8 palette[768];
if( pal ) {
nvDebugCheck(tga.colormap_length < 256);
s.serialize(palette, 3 * tga.colormap_length);
}
// Decode image.
uint8 * mem = new uint8[size];
if( rle ) {
// Decompress image in src.
uint8 * dst = mem;
int num = size;
while (num > 0) {
// Get packet header
uint8 c;
s << c;
uint count = (c & 0x7f) + 1;
num -= count * pixel_size;
if (c & 0x80) {
// RLE pixels.
uint8 pixel[4]; // uint8 pixel[pixel_size];
s.serialize( pixel, pixel_size );
do {
memcpy(dst, pixel, pixel_size);
dst += pixel_size;
} while (--count);
}
else {
// Raw pixels.
count *= pixel_size;
//file->Read8(dst, count);
s.serialize(dst, count);
dst += count;
}
}
}
else {
s.serialize(mem, size);
}
// Allocate image.
AutoPtr<Image> img(new Image());
img->allocate(tga.width, tga.height);
int lstep;
Color32 * dst;
if( tga.flags & TGA_ORIGIN_UPPER ) {
lstep = tga.width;
dst = img->pixels();
}
else {
lstep = - tga.width;
dst = img->pixels() + (tga.height-1) * tga.width;
}
// Write image.
uint8 * src = mem;
if( pal ) {
for( int y = 0; y < tga.height; y++ ) {
for( int x = 0; x < tga.width; x++ ) {
uint8 idx = *src++;
dst[x].setBGRA(palette[3*idx+0], palette[3*idx+1], palette[3*idx+2], 0xFF);
}
dst += lstep;
}
}
else if( grey ) {
img->setFormat(Image::Format_ARGB);
for( int y = 0; y < tga.height; y++ ) {
for( int x = 0; x < tga.width; x++ ) {
dst[x].setBGRA(*src, *src, *src, *src);
src++;
}
dst += lstep;
}
}
else {
if( tga.pixel_size == 16 ) {
for( int y = 0; y < tga.height; y++ ) {
for( int x = 0; x < tga.width; x++ ) {
Color555 c = *reinterpret_cast<Color555 *>(src);
uint8 b = (c.b << 3) | (c.b >> 2);
uint8 g = (c.g << 3) | (c.g >> 2);
uint8 r = (c.r << 3) | (c.r >> 2);
dst[x].setBGRA(b, g, r, 0xFF);
src += 2;
}
dst += lstep;
}
}
else if( tga.pixel_size == 24 ) {
for( int y = 0; y < tga.height; y++ ) {
for( int x = 0; x < tga.width; x++ ) {
dst[x].setBGRA(src[0], src[1], src[2], 0xFF);
src += 3;
}
dst += lstep;
}
}
else if( tga.pixel_size == 32 ) {
img->setFormat(Image::Format_ARGB);
for( int y = 0; y < tga.height; y++ ) {
for( int x = 0; x < tga.width; x++ ) {
dst[x].setBGRA(src[0], src[1], src[2], src[3]);
src += 4;
}
dst += lstep;
}
}
}
// free uncompressed data.
delete [] mem;
return img.release();
}
/// Save TGA image.
bool nv::ImageIO::saveTGA(Stream & s, const Image * img)
{
nvCheck(!s.isError());
nvCheck(img != NULL);
nvCheck(img->pixels() != NULL);
TgaFile tga;
tga.head.id_length = 0;
tga.head.colormap_type = 0;
tga.head.image_type = TGA_TYPE_RGB;
tga.head.colormap_index = 0;
tga.head.colormap_length = 0;
tga.head.colormap_size = 0;
tga.head.x_origin = 0;
tga.head.y_origin = 0;
tga.head.width = img->width();
tga.head.height = img->height();
if(img->format() == Image::Format_ARGB) {
tga.head.pixel_size = 32;
tga.head.flags = TGA_ORIGIN_UPPER;
}
else {
tga.head.pixel_size = 24;
tga.head.flags = TGA_ORIGIN_UPPER;
}
// @@ Serialize directly.
tga.allocate();
const uint n = img->width() * img->height();
if(img->format() == Image::Format_ARGB) {
for(uint i = 0; i < n; i++) {
Color32 color = img->pixel(i);
tga.mem[4 * i + 0] = color.b;
tga.mem[4 * i + 1] = color.g;
tga.mem[4 * i + 2] = color.r;
tga.mem[4 * i + 3] = color.a;
}
}
else {
for(uint i = 0; i < n; i++) {
Color32 color = img->pixel(i);
tga.mem[3 * i + 0] = color.b;
tga.mem[3 * i + 1] = color.g;
tga.mem[3 * i + 2] = color.r;
}
}
s << tga;
tga.free();
return true;
}
#if defined(HAVE_PNG)
static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
{
nvDebugCheck(png_ptr != NULL);
Stream * s = (Stream *)png_ptr->io_ptr;
s->serialize(data, (int)length);
if (s->isError()) {
png_error(png_ptr, "Read Error");
}
}
Image * nv::ImageIO::loadPNG(Stream & s)
{
nvCheck(!s.isError());
// Set up a read buffer and check the library version
png_structp png_ptr;
png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
if (png_ptr == NULL) {
// nvDebug( "*** LoadPNG: Error allocating read buffer in file '%s'.\n", name );
return false;
}
// Allocate/initialize a memory block for the image information
png_infop info_ptr = png_create_info_struct(png_ptr);
if (info_ptr == NULL) {
png_destroy_read_struct(&png_ptr, NULL, NULL);
// nvDebug( "*** LoadPNG: Error allocating image information for '%s'.\n", name );
return false;
}
// Set up the error handling
if (setjmp(png_jmpbuf(png_ptr))) {
png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
// nvDebug( "*** LoadPNG: Error reading png file '%s'.\n", name );
return false;
}
// Set up the I/O functions.
png_set_read_fn(png_ptr, (void*)&s, user_read_data);
// Retrieve the image header information
png_uint_32 width, height;
int bit_depth, color_type, interlace_type;
png_read_info(png_ptr, info_ptr);
png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, &interlace_type, NULL, NULL);
if (color_type == PNG_COLOR_TYPE_PALETTE && bit_depth <= 8) {
// Convert indexed images to RGB.
png_set_expand(png_ptr);
}
else if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) {
// Convert grayscale to RGB.
png_set_expand(png_ptr);
}
else if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) {
// Expand images with transparency to full alpha channels
// so the data will be available as RGBA quartets.
png_set_expand(png_ptr);
}
else if (bit_depth < 8) {
// If we have < 8 scale it up to 8.
//png_set_expand(png_ptr);
png_set_packing(png_ptr);
}
// Reduce bit depth.
if (bit_depth == 16) {
png_set_strip_16(png_ptr);
}
// Represent gray as RGB
if (color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
png_set_gray_to_rgb(png_ptr);
}
// Convert to RGBA filling alpha with 0xFF.
if (!(color_type & PNG_COLOR_MASK_ALPHA)) {
png_set_filler(png_ptr, 0xFF, PNG_FILLER_AFTER);
}
// @todo Choose gamma according to the platform?
double screen_gamma = 2.2;
int intent;
if (png_get_sRGB(png_ptr, info_ptr, &intent)) {
png_set_gamma(png_ptr, screen_gamma, 0.45455);
}
else {
double image_gamma;
if (png_get_gAMA(png_ptr, info_ptr, &image_gamma)) {
png_set_gamma(png_ptr, screen_gamma, image_gamma);
}
else {
png_set_gamma(png_ptr, screen_gamma, 0.45455);
}
}
// Perform the selected transforms.
png_read_update_info(png_ptr, info_ptr);
png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, &interlace_type, NULL, NULL);
AutoPtr<Image> img(new Image());
img->allocate(width, height);
// Set internal format flags.
if(color_type & PNG_COLOR_MASK_COLOR) {
//img->flags |= PI_IF_HAS_COLOR;
}
if(color_type & PNG_COLOR_MASK_ALPHA) {
//img->flags |= PI_IF_HAS_ALPHA;
img->setFormat(Image::Format_ARGB);
}
// Read the image
uint8 * pixels = (uint8 *)img->pixels();
png_bytep * row_data = new png_bytep[sizeof(png_byte) * height];
for (uint i = 0; i < height; i++) {
row_data[i] = &(pixels[width * 4 * i]);
}
png_read_image(png_ptr, row_data);
delete [] row_data;
// Finish things up
png_read_end(png_ptr, info_ptr);
png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
// RGBA to BGRA.
uint num = width * height;
for(uint i = 0; i < num; i++)
{
Color32 c = img->pixel(i);
img->pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
// Compute alpha channel if needed.
/*if( img->flags & PI_IU_BUMPMAP || img->flags & PI_IU_ALPHAMAP ) {
if( img->flags & PI_IF_HAS_COLOR && !(img->flags & PI_IF_HAS_ALPHA)) {
img->ComputeAlphaFromColor();
}
}*/
return img.release();
}
FloatImage * nv::ImageIO::loadFloatPNG(Stream & s)
{
return NULL;
}
#endif // defined(HAVE_PNG)
#if defined(HAVE_JPEG)
static void init_source (j_decompress_ptr /*cinfo*/){
}
static boolean fill_input_buffer (j_decompress_ptr cinfo){
struct jpeg_source_mgr * src = cinfo->src;
static JOCTET FakeEOI[] = { 0xFF, JPEG_EOI };
// Generate warning
nvDebug("jpeglib: Premature end of file\n");
// Insert a fake EOI marker
src->next_input_byte = FakeEOI;
src->bytes_in_buffer = 2;
return TRUE;
}
static void skip_input_data (j_decompress_ptr cinfo, long num_bytes) {
struct jpeg_source_mgr * src = cinfo->src;
if(num_bytes >= (long)src->bytes_in_buffer) {
fill_input_buffer(cinfo);
return;
}
src->bytes_in_buffer -= num_bytes;
src->next_input_byte += num_bytes;
}
static void term_source (j_decompress_ptr /*cinfo*/){
// no work necessary here
}
Image * nv::ImageIO::loadJPG(Stream & s)
{
nvCheck(!s.isError());
// Read the entire file.
Array<uint8> byte_array;
byte_array.resize(s.size());
s.serialize(byte_array.unsecureBuffer(), s.size());
jpeg_decompress_struct cinfo;
jpeg_error_mgr jerr;
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
cinfo.src = (struct jpeg_source_mgr *) (*cinfo.mem->alloc_small)
((j_common_ptr) &cinfo, JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr));
cinfo.src->init_source = init_source;
cinfo.src->fill_input_buffer = fill_input_buffer;
cinfo.src->skip_input_data = skip_input_data;
cinfo.src->resync_to_restart = jpeg_resync_to_restart; // use default method
cinfo.src->term_source = term_source;
cinfo.src->bytes_in_buffer = byte_array.size();
cinfo.src->next_input_byte = byte_array.buffer();
jpeg_read_header(&cinfo, TRUE);
jpeg_start_decompress(&cinfo);
/*
cinfo.do_fancy_upsampling = FALSE; // fast decompression
cinfo.dct_method = JDCT_FLOAT; // Choose floating point DCT method.
*/
uint8 * tmp_buffer = new uint8 [cinfo.output_width * cinfo.output_height * cinfo.num_components];
uint8 * scanline = tmp_buffer;
while( cinfo.output_scanline < cinfo.output_height ){
int num_scanlines = jpeg_read_scanlines (&cinfo, &scanline, 1);
scanline += num_scanlines * cinfo.output_width * cinfo.num_components;
}
jpeg_finish_decompress(&cinfo);
AutoPtr<Image> img(new Image());
img->allocate(cinfo.output_width, cinfo.output_height);
Color32 * dst = img->pixels();
const int size = img->height() * img->width();
const uint8 * src = tmp_buffer;
if( cinfo.num_components == 3 ) {
img->setFormat(Image::Format_RGB);
for( int i = 0; i < size; i++ ) {
*dst++ = Color32(src[0], src[1], src[2]);
src += 3;
}
}
else {
img->setFormat(Image::Format_ARGB);
for( int i = 0; i < size; i++ ) {
*dst++ = Color32(*src, *src, *src, *src);
src++;
}
}
delete [] tmp_buffer;
jpeg_destroy_decompress (&cinfo);
return img.release();
}
#endif // defined(HAVE_JPEG)
#if defined(HAVE_TIFF)
FloatImage * nv::ImageIO::loadFloatTIFF(Stream & s)
{
nvCheck(!s.isError());
return NULL;
}
FloatImage * nv::ImageIO::loadFloatTIFF(const char * fileName)
{
TIFF * tif = TIFFOpen(fileName, "r");
if (!tif)
{
nvDebug("Can't open '%s' for reading\n", fileName);
return NULL;
}
::uint16 spp, bpp;
::uint32 width, height;
TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &height);
TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &width);
TIFFGetField(tif, TIFFTAG_BITSPERSAMPLE, &bpp);
TIFFGetField(tif, TIFFTAG_SAMPLESPERPIXEL, &spp);
if (spp != 1 || (bpp != 8 && bpp != 16 && bpp != 32)) {
nvDebug("Can't load '%s', only 1 sample per pixel supported\n", fileName);
TIFFClose(tif);
return NULL;
}
FloatImage * fimage = new FloatImage();
fimage->allocate(spp, width, height);
int linesize = TIFFScanlineSize(tif);
tdata_t buf = (::uint8 *)nv::mem::malloc(linesize);
for (uint y = 0; y < height; y++) {
TIFFReadScanline(tif, buf, y, 0);
float * dst = fimage->scanline(y, 0);
if (bpp == 8) {
for(uint x = 0; x < width; x++) {
dst[x] = float(((::uint8 *)buf)[x]) / float(0xFF);
}
}
else if (bpp == 16) {
for(uint x = 0; x < width; x++) {
dst[x] = float(((::uint16 *)buf)[x]) / float(0xFFFF);
}
}
else /*if (bpp == 32)*/ {
// Mantissa has only 24 bits, so drop 8 bits.
for(uint x = 0; x < width; x++) {
dst[x] = float(((::uint32 *)buf)[x] >> 8) / float(0xFFFFFF);
}
}
}
nv::mem::free(buf);
TIFFClose(tif);
return fimage;
}
#endif
#if 0
/** Save PNG*/
static bool SavePNG(const PiImage * img, const char * name) {
nvCheck( img != NULL );
nvCheck( img->mem != NULL );
if( piStrCmp(piExtension(name), ".png" ) != 0 ) {
return false;
}
if( img->flags & PI_IT_CUBEMAP ) {
nvDebug("*** Cannot save cubemaps as PNG.");
return false;
}
if( img->flags & PI_IT_DDS ) {
nvDebug("*** Cannot save DDS surface as PNG.");
return false;
}
nvDebug( "--- Saving '%s'.\n", name );
PiAutoPtr<PiStream> ar( PiFileSystem::CreateFileWriter( name ) );
if( ar == NULL ) {
nvDebug( "*** SavePNG: Error, cannot save file '%s'.\n", name );
return false;
}
/*
public class PNGEnc {
public static function encode(img:BitmapData):ByteArray {
// Create output byte array
var png:ByteArray = new ByteArray();
// Write PNG signature
png.writeUnsignedInt(0x89504e47);
png.writeUnsignedInt(0x0D0A1A0A);
// Build IHDR chunk
var IHDR:ByteArray = new ByteArray();
IHDR.writeInt(img.width);
IHDR.writeInt(img.height);
IHDR.writeUnsignedInt(0x08060000); // 32bit RGBA
IHDR.writeByte(0);
writeChunk(png,0x49484452,IHDR);
// Build IDAT chunk
var IDAT:ByteArray= new ByteArray();
for(var i:int=0;i < img.height;i++) {
// no filter
IDAT.writeByte(0);
var p:uint;
if ( !img.transparent ) {
for(var j:int=0;j < img.width;j++) {
p = img.getPixel(j,i);
IDAT.writeUnsignedInt(
uint(((p&0xFFFFFF) << 8)|0xFF));
}
} else {
for(var j:int=0;j < img.width;j++) {
p = img.getPixel32(j,i);
IDAT.writeUnsignedInt(
uint(((p&0xFFFFFF) << 8)|
(shr(p,24))));
}
}
}
IDAT.compress();
writeChunk(png,0x49444154,IDAT);
// Build IEND chunk
writeChunk(png,0x49454E44,null);
// return PNG
return png;
}
private static var crcTable:Array;
private static var crcTableComputed:Boolean = false;
private static function writeChunk(png:ByteArray,
type:uint, data:ByteArray) {
if (!crcTableComputed) {
crcTableComputed = true;
crcTable = [];
for (var n:uint = 0; n < 256; n++) {
var c:uint = n;
for (var k:uint = 0; k < 8; k++) {
if (c & 1) {
c = uint(uint(0xedb88320) ^
uint(c >>> 1));
} else {
c = uint(c >>> 1);
}
}
crcTable[n] = c;
}
}
var len:uint = 0;
if (data != null) {
len = data.length;
}
png.writeUnsignedInt(len);
var p:uint = png.position;
png.writeUnsignedInt(type);
if ( data != null ) {
png.writeBytes(data);
}
var e:uint = png.position;
png.position = p;
var c:uint = 0xffffffff;
for (var i:int = 0; i < (e-p); i++) {
c = uint(crcTable[
(c ^ png.readUnsignedByte()) &
uint(0xff)] ^ uint(c >>> 8));
}
c = uint(c^uint(0xffffffff));
png.position = e;
png.writeUnsignedInt(c);
}
}
*/
}
#endif // 0
#if 0
namespace ImageIO {
/** Init ImageIO plugins. */
void InitPlugins() {
// AddInputPlugin( "", LoadANY );
AddInputPlugin( "tga", LoadTGA );
#if HAVE_PNG
AddInputPlugin( "png", LoadPNG );
#endif
#if HAVE_JPEG
AddInputPlugin( "jpg", LoadJPG );
#endif
AddInputPlugin( "dds", LoadDDS );
AddOutputPlugin( "tga", SaveTGA );
}
/** Reset ImageIO plugins. */
void ResetPlugins() {
s_plugin_load_map.Clear();
s_plugin_save_map.Clear();
}
/** Add an input plugin. */
void AddInputPlugin( const char * ext, ImageInput_Plugin plugin ) {
s_plugin_load_map.Add(ext, plugin);
}
/** Add an output plugin. */
void AddOutputPlugin( const char * ext, ImageOutput_Plugin plugin ) {
s_plugin_save_map.Add(ext, plugin);
}
bool Load(PiImage * img, const char * name, PiStream & stream) {
// Get name extension.
const char * extension = piExtension(name);
// Skip the dot.
if( *extension == '.' ) {
extension++;
}
// Lookup plugin in the map.
ImageInput_Plugin plugin = NULL;
if( s_plugin_load_map.Get(extension, &plugin) ) {
return plugin(img, stream);
}
/*foreach(i, s_plugin_load_map) {
nvDebug("%s %s %d\n", s_plugin_load_map[i].key.GetStr(), extension, 0 == strcmp(extension, s_plugin_load_map[i].key));
}
nvDebug("No plugin found for '%s' %d.\n", extension, s_plugin_load_map.Size());*/
return false;
}
bool Save(const PiImage * img, const char * name, PiStream & stream) {
// Get name extension.
const char * extension = piExtension(name);
// Skip the dot.
if( *extension == '.' ) {
extension++;
}
// Lookup plugin in the map.
ImageOutput_Plugin plugin = NULL;
if( s_plugin_save_map.Get(extension, &plugin) ) {
return plugin(img, stream);
}
return false;
}
} // ImageIO
#endif // 0

43
src/nvimage/ImageIO.h Normal file
View File

@ -0,0 +1,43 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_IMAGE_IMAGEIO_H
#define NV_IMAGE_IMAGEIO_H
#include <nvimage/nvimage.h>
namespace nv
{
class Image;
class FloatImage;
class Stream;
namespace ImageIO
{
NVIMAGE_API Image * load(const char * name);
NVIMAGE_API Image * load(const char * name, Stream & s);
NVIMAGE_API Image * loadTGA(Stream & s);
NVIMAGE_API bool saveTGA(Stream & s, const Image * img);
#if defined(HAVE_PNG)
NVIMAGE_API Image * loadPNG(Stream & s);
NVIMAGE_API FloatImage * loadFloatPNG(Stream & s);
#endif
#if defined(HAVE_JPEG)
NVIMAGE_API Image * loadJPG(Stream & s);
#endif
#if defined(HAVE_TIFF)
// Hack!
NVIMAGE_API FloatImage * loadFloatTIFF(const char * fileName);
NVIMAGE_API FloatImage * loadFloatTIFF(Stream & s);
#endif
} // ImageIO namespace
} // nv namespace
#endif // NV_IMAGE_IMAGEIO_H

138
src/nvimage/NormalMap.cpp Normal file
View File

@ -0,0 +1,138 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Ptr.h>
#include <nvmath/Color.h>
#include <nvimage/NormalMap.h>
#include <nvimage/Filter.h>
#include <nvimage/FloatImage.h>
#include <nvimage/Image.h>
using namespace nv;
// Create normal map using the given kernels.
static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, const Kernel2 * kdu, const Kernel2 * kdv)
{
nvCheck(kdu != NULL);
nvCheck(kdv != NULL);
nvCheck(img != NULL);
const uint w = img->width();
const uint h = img->height();
AutoPtr<FloatImage> fimage(new FloatImage());
fimage->allocate(4, w, h);
// Compute height and store in alpha channel:
float * alphaChannel = fimage->channel(3);
for(uint i = 0; i < w*h; i++)
{
Vector4 color = toVector4(img->pixel(i));
alphaChannel[i] = dot(color, heightWeights);
}
float heightScale = 1.0f / 16.0f; // @@ Use a user defined factor.
for(uint y = 0; y < h; y++)
{
for(uint x = 0; x < w; x++)
{
const float du = fimage->applyKernel(kdu, x, y, 3, wm);
const float dv = fimage->applyKernel(kdv, x, y, 3, wm);
Vector3 n = normalize(Vector3(du, dv, heightScale));
fimage->setPixel(0.5f * n.x() + 0.5f, x, y, 0);
fimage->setPixel(0.5f * n.y() + 0.5f, x, y, 1);
fimage->setPixel(0.5f * n.z() + 0.5f, x, y, 2);
}
}
return fimage.release();
}
/// Create normal map using the given filter.
FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter /*= Sobel3x3*/)
{
nvCheck(img != NULL);
// Init the kernels.
Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL;
switch(filter)
{
case NormalMapFilter_Sobel3x3:
kdu = new Kernel2(3);
break;
case NormalMapFilter_Sobel5x5:
kdu = new Kernel2(5);
break;
case NormalMapFilter_Sobel7x7:
kdu = new Kernel2(7);
break;
case NormalMapFilter_Sobel9x9:
kdu = new Kernel2(9);
break;
default:
nvDebugCheck(false);
};
kdu->initSobel();
kdu->normalize();
kdv = new Kernel2(*kdu);
kdv->transpose();
return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
}
/// Create normal map combining multiple sobel filters.
FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights)
{
nvCheck(img != NULL);
Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL;
kdu = new Kernel2(9);
kdu->initBlendedSobel(filterWeights);
kdu->normalize();
kdv = new Kernel2(*kdu);
kdv->transpose();
return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
}
/// Normalize the given image in place.
void nv::normalize(FloatImage * img)
{
nvCheck(img != NULL);
img->normalize(0);
}

55
src/nvimage/NormalMap.h Normal file
View File

@ -0,0 +1,55 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_IMAGE_NORMALMAP_H
#define NV_IMAGE_NORMALMAP_H
#include <nvmath/Vector.h>
#include <nvimage/nvimage.h>
#include <nvimage/FloatImage.h>
namespace nv
{
class Image;
enum NormalMapFilter
{
NormalMapFilter_Sobel3x3, // fine detail
NormalMapFilter_Sobel5x5, // medium detail
NormalMapFilter_Sobel7x7, // large detail
NormalMapFilter_Sobel9x9, // very large
};
FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3);
FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights);
void normalize(FloatImage * img);
// @@ Add generation of DU/DV maps.
} // nv namespace
#endif // NV_IMAGE_NORMALMAP_H

234
src/nvimage/Quantize.cpp Normal file
View File

@ -0,0 +1,234 @@
// This code is in the public domain -- castanyo@yahoo.es
/*
http://www.visgraf.impa.br/Courses/ip00/proj/Dithering1/floyd_steinberg_dithering.html
http://www.gamedev.net/reference/articles/article341.asp
@@ Look at LPS: http://www.cs.rit.edu/~pga/pics2000/i.html
This is a really nice guide to dithering algorithms:
http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT
@@ This code needs to be reviewed, I'm not sure it's correct.
*/
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include <nvimage/Quantize.h>
using namespace nv;
// Simple quantization.
void nv::Quantize::BinaryAlpha( Image * image, int alpha_threshold /*= 127*/ )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Convert color.
if( pixel.a > alpha_threshold ) pixel.a = 255;
else pixel.a = 0;
// Store color.
image->pixel(x, y) = pixel;
}
}
}
// Simple quantization.
void nv::Quantize::RGB16( Image * image )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel32 = image->pixel(x, y);
// Convert to 16 bit and back to 32 using regular bit expansion.
Color32 pixel16 = toColor32( toColor16(pixel32) );
// Store color.
image->pixel(x, y) = pixel16;
}
}
}
// Alpha quantization.
void nv::Quantize::Alpha4( Image * image )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Convert to 4 bit using regular bit expansion.
pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
// Store color.
image->pixel(x, y) = pixel;
}
}
}
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg_RGB16( Image * image )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
// @@ Use fixed point?
Vector3 * row0 = new Vector3[w+2];
Vector3 * row1 = new Vector3[w+2];
memset(row0, 0, sizeof(Vector3)*(w+2));
memset(row1, 0, sizeof(Vector3)*(w+2));
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel32 = image->pixel(x, y);
// Add error. // @@ We shouldn't clamp here!
pixel32.r = clamp(int(pixel32.r) + int(row0[1+x].x()), 0, 255);
pixel32.g = clamp(int(pixel32.g) + int(row0[1+x].y()), 0, 255);
pixel32.b = clamp(int(pixel32.b) + int(row0[1+x].z()), 0, 255);
// Convert to 16 bit. @@ Use regular clamp?
Color32 pixel16 = toColor32( toColor16(pixel32) );
// Store color.
image->pixel(x, y) = pixel16;
// Compute new error.
Vector3 diff(float(pixel32.r - pixel16.r), float(pixel32.g - pixel16.g), float(pixel32.b - pixel16.b));
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
row1[1+x-1] += 3.0f / 16.0f * diff;
row1[1+x+0] += 5.0f / 16.0f * diff;
row1[1+x+1] += 1.0f / 16.0f * diff;
}
swap(row0, row1);
memset(row1, 0, sizeof(Vector3)*(w+2));
}
delete [] row0;
delete [] row1;
}
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg_BinaryAlpha( Image * image, int alpha_threshold /*= 127*/ )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
// @@ Use fixed point?
float * row0 = new float[(w+2)];
float * row1 = new float[(w+2)];
memset(row0, 0, sizeof(float)*(w+2));
memset(row1, 0, sizeof(float)*(w+2));
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Add error.
int alpha = int(pixel.a) + int(row0[1+x]);
// Convert color.
if( alpha > alpha_threshold ) pixel.a = 255;
else pixel.a = 0;
// Store color.
image->pixel(x, y) = pixel;
// Compute new error.
float diff = float(alpha - pixel.a);
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
row1[1+x-1] += 3.0f / 16.0f * diff;
row1[1+x+0] += 5.0f / 16.0f * diff;
row1[1+x+1] += 1.0f / 16.0f * diff;
}
swap(row0, row1);
memset(row1, 0, sizeof(float)*(w+2));
}
delete [] row0;
delete [] row1;
}
// Error diffusion. Floyd Steinberg.
void nv::Quantize::FloydSteinberg_Alpha4( Image * image )
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
// @@ Use fixed point?
float * row0 = new float[(w+2)];
float * row1 = new float[(w+2)];
memset(row0, 0, sizeof(float)*(w+2));
memset(row1, 0, sizeof(float)*(w+2));
for(uint y = 0; y < h; y++) {
for(uint x = 0; x < w; x++) {
Color32 pixel = image->pixel(x, y);
// Add error.
int alpha = int(pixel.a) + int(row0[1+x]);
// Convert to 4 bit using regular bit expansion.
pixel.a = (pixel.a & 0xF0) | ((pixel.a & 0xF0) >> 4);
// Store color.
image->pixel(x, y) = pixel;
// Compute new error.
float diff = float(alpha - pixel.a);
// Propagate new error.
row0[1+x+1] += 7.0f / 16.0f * diff;
row1[1+x-1] += 3.0f / 16.0f * diff;
row1[1+x+0] += 5.0f / 16.0f * diff;
row1[1+x+1] += 1.0f / 16.0f * diff;
}
swap(row0, row1);
memset(row1, 0, sizeof(float)*(w+2));
}
delete [] row0;
delete [] row1;
}

25
src/nvimage/Quantize.h Normal file
View File

@ -0,0 +1,25 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_IMAGE_QUANTIZE_H
#define NV_IMAGE_QUANTIZE_H
namespace nv
{
class Image;
namespace Quantize
{
void RGB16(Image * img);
void BinaryAlpha(Image * img, int alpha_threshold = 127);
void Alpha4(Image * img);
void FloydSteinberg_RGB16(Image * img);
void FloydSteinberg_BinaryAlpha(Image * img, int alpha_threshold = 127);
void FloydSteinberg_Alpha4(Image * img);
// @@ Add palette quantization algorithms!
}
}
#endif // NV_IMAGE_QUANTIZE_H

103
src/nvimage/TgaFile.h Normal file
View File

@ -0,0 +1,103 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_IMAGE_TGAFILE_H
#define NV_IMAGE_TGAFILE_H
#include <nvcore/Stream.h>
namespace nv
{
// TGA types
enum TGAType {
TGA_TYPE_INDEXED = 1,
TGA_TYPE_RGB = 2,
TGA_TYPE_GREY = 3,
TGA_TYPE_RLE_INDEXED = 9,
TGA_TYPE_RLE_RGB = 10,
TGA_TYPE_RLE_GREY = 11
};
#define TGA_INTERLEAVE_MASK 0xc0
#define TGA_INTERLEAVE_NONE 0x00
#define TGA_INTERLEAVE_2WAY 0x40
#define TGA_INTERLEAVE_4WAY 0x80
#define TGA_ORIGIN_MASK 0x30
#define TGA_ORIGIN_LEFT 0x00
#define TGA_ORIGIN_RIGHT 0x10
#define TGA_ORIGIN_LOWER 0x00
#define TGA_ORIGIN_UPPER 0x20
/// Tga Header.
struct TgaHeader {
uint8 id_length;
uint8 colormap_type;
uint8 image_type;
uint16 colormap_index;
uint16 colormap_length;
uint8 colormap_size;
uint16 x_origin;
uint16 y_origin;
uint16 width;
uint16 height;
uint8 pixel_size;
uint8 flags;
enum { Size = 18 }; //const static int SIZE = 18;
};
/// Tga File.
struct TgaFile {
TgaFile() {
mem = NULL;
}
~TgaFile() {
free();
}
uint size() const {
return head.width * head.height * (head.pixel_size / 8);
}
void allocate() {
nvCheck( mem == NULL );
mem = new uint8[size()];
}
void free() {
delete [] mem;
mem = NULL;
}
TgaHeader head;
uint8 * mem;
};
inline Stream & operator<< (Stream & s, TgaHeader & head)
{
s << head.id_length << head.colormap_type << head.image_type;
s << head.colormap_index << head.colormap_length << head.colormap_size;
s << head.x_origin << head.y_origin << head.width << head.height;
s << head.pixel_size << head.flags;
return s;
}
inline Stream & operator<< (Stream & s, TgaFile & tga)
{
s << tga.head;
if( s.isLoading() ) {
tga.allocate();
}
s.serialize( tga.mem, tga.size() );
return s;
}
} // nv namespace
#endif // NV_IMAGE_TGAFILE_H

22
src/nvimage/nvimage.h Normal file
View File

@ -0,0 +1,22 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_IMAGE_H
#define NV_IMAGE_H
#include <nvcore/nvcore.h>
// Function linkage
#if NVIMAGE_SHARED
#ifdef NVIMAGE_EXPORTS
#define NVIMAGE_API DLL_EXPORT
#define NVIMAGE_CLASS DLL_EXPORT_CLASS
#else
#define NVIMAGE_API DLL_IMPORT
#define NVIMAGE_CLASS DLL_IMPORT
#endif
#else
#define NVIMAGE_API
#define NVIMAGE_CLASS
#endif
#endif // NV_IMAGE_H

View File

@ -0,0 +1,553 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvimage/ColorBlock.h>
#include "BlockDXT.h"
using namespace nv;
/*----------------------------------------------------------------------------
BlockDXT1
----------------------------------------------------------------------------*/
uint BlockDXT1::evaluatePalette(Color32 color_array[4]) const
{
// Does bit expansion before interpolation.
color_array[0].b = (col0.b << 3) | (col0.b >> 2);
color_array[0].g = (col0.g << 2) | (col0.g >> 4);
color_array[0].r = (col0.r << 3) | (col0.r >> 2);
color_array[0].a = 0xFF;
// @@ Same as above, but faster?
// Color32 c;
// c.u = ((col0.u << 3) & 0xf8) | ((col0.u << 5) & 0xfc00) | ((col0.u << 8) & 0xf80000);
// c.u |= (c.u >> 5) & 0x070007;
// c.u |= (c.u >> 6) & 0x000300;
// color_array[0].u = c.u;
color_array[1].r = (col1.r << 3) | (col1.r >> 2);
color_array[1].g = (col1.g << 2) | (col1.g >> 4);
color_array[1].b = (col1.b << 3) | (col1.b >> 2);
color_array[1].a = 0xFF;
// @@ Same as above, but faster?
// c.u = ((col1.u << 3) & 0xf8) | ((col1.u << 5) & 0xfc00) | ((col1.u << 8) & 0xf80000);
// c.u |= (c.u >> 5) & 0x070007;
// c.u |= (c.u >> 6) & 0x000300;
// color_array[1].u = c.u;
if( col0.u > col1.u ) {
// Four-color block: derive the other two colors.
color_array[2].r = (2 * color_array[0].r + color_array[1].r) / 3;
color_array[2].g = (2 * color_array[0].g + color_array[1].g) / 3;
color_array[2].b = (2 * color_array[0].b + color_array[1].b) / 3;
color_array[2].a = 0xFF;
color_array[3].r = (2 * color_array[1].r + color_array[0].r) / 3;
color_array[3].g = (2 * color_array[1].g + color_array[0].g) / 3;
color_array[3].b = (2 * color_array[1].b + color_array[0].b) / 3;
color_array[3].a = 0xFF;
return 4;
}
else {
// Three-color block: derive the other color.
color_array[2].r = (color_array[0].r + color_array[1].r) / 2;
color_array[2].g = (color_array[0].g + color_array[1].g) / 2;
color_array[2].b = (color_array[0].b + color_array[1].b) / 2;
color_array[2].a = 0xFF;
// Set all components to 0 to match DXT specs.
color_array[3].r = 0x00; // color_array[2].r;
color_array[3].g = 0x00; // color_array[2].g;
color_array[3].b = 0x00; // color_array[2].b;
color_array[3].a = 0x00;
return 3;
}
}
// Evaluate palette assuming 3 color block.
void BlockDXT1::evaluatePalette3(Color32 color_array[4]) const
{
color_array[0].b = (col0.b << 3) | (col0.b >> 2);
color_array[0].g = (col0.g << 2) | (col0.g >> 4);
color_array[0].r = (col0.r << 3) | (col0.r >> 2);
color_array[0].a = 0xFF;
color_array[1].r = (col1.r << 3) | (col1.r >> 2);
color_array[1].g = (col1.g << 2) | (col1.g >> 4);
color_array[1].b = (col1.b << 3) | (col1.b >> 2);
color_array[1].a = 0xFF;
// Three-color block: derive the other color.
color_array[2].r = (color_array[0].r + color_array[1].r) / 2;
color_array[2].g = (color_array[0].g + color_array[1].g) / 2;
color_array[2].b = (color_array[0].b + color_array[1].b) / 2;
color_array[2].a = 0xFF;
// Set all components to 0 to match DXT specs.
color_array[3].r = 0x00; // color_array[2].r;
color_array[3].g = 0x00; // color_array[2].g;
color_array[3].b = 0x00; // color_array[2].b;
color_array[3].a = 0x00;
}
// Evaluate palette assuming 4 color block.
void BlockDXT1::evaluatePalette4(Color32 color_array[4]) const
{
color_array[0].b = (col0.b << 3) | (col0.b >> 2);
color_array[0].g = (col0.g << 2) | (col0.g >> 4);
color_array[0].r = (col0.r << 3) | (col0.r >> 2);
color_array[0].a = 0xFF;
color_array[1].r = (col1.r << 3) | (col1.r >> 2);
color_array[1].g = (col1.g << 2) | (col1.g >> 4);
color_array[1].b = (col1.b << 3) | (col1.b >> 2);
color_array[1].a = 0xFF;
// Four-color block: derive the other two colors.
color_array[2].r = (2 * color_array[0].r + color_array[1].r) / 3;
color_array[2].g = (2 * color_array[0].g + color_array[1].g) / 3;
color_array[2].b = (2 * color_array[0].b + color_array[1].b) / 3;
color_array[2].a = 0xFF;
color_array[3].r = (2 * color_array[1].r + color_array[0].r) / 3;
color_array[3].g = (2 * color_array[1].g + color_array[0].g) / 3;
color_array[3].b = (2 * color_array[1].b + color_array[0].b) / 3;
color_array[3].a = 0xFF;
}
/* Jason Dorie's code.
// ----------------------------------------------------------------------------
// Build palette for a 3 color + traparent black block
// ----------------------------------------------------------------------------
void DXTCGen::BuildCodes3(cbVector *pVects, cbVector &v1, cbVector &v2)
{
//pVects[0] = v1;
//pVects[2] = v2;
//pVects[1][0] = v1[0];
//pVects[1][1] = (BYTE)( ((long)v1[1] + (long)v2[1]) / 2 );
//pVects[1][2] = (BYTE)( ((long)v1[2] + (long)v2[2]) / 2 );
//pVects[1][3] = (BYTE)( ((long)v1[3] + (long)v2[3]) / 2 );
__asm {
mov ecx, dword ptr pVects
mov eax, dword ptr v1
mov ebx, dword ptr v2
movd mm0, [eax]
movd mm1, [ebx]
pxor mm2, mm2
nop
movd [ecx], mm0
movd [ecx+8], mm1
punpcklbw mm0, mm2
punpcklbw mm1, mm2
paddw mm0, mm1
psrlw mm0, 1
packuswb mm0, mm0
movd [ecx+4], mm0
}
// *(long *)&pVects[1] = r1;
}
__int64 ScaleOneThird = 0x5500550055005500;
// ----------------------------------------------------------------------------
// Build palette for a 4 color block
// ----------------------------------------------------------------------------
void DXTCGen::BuildCodes4(cbVector *pVects, cbVector &v1, cbVector &v2)
{
// pVects[0] = v1;
// pVects[3] = v2;
//
// pVects[1][0] = v1[0];
// pVects[1][1] = (BYTE)( ((long)v1[1] * 2 + (long)v2[1]) / 3 );
// pVects[1][2] = (BYTE)( ((long)v1[2] * 2 + (long)v2[2]) / 3 );
// pVects[1][3] = (BYTE)( ((long)v1[3] * 2 + (long)v2[3]) / 3 );
//
// pVects[2][0] = v1[0];
// pVects[2][1] = (BYTE)( ((long)v2[1] * 2 + (long)v1[1]) / 3 );
// pVects[2][2] = (BYTE)( ((long)v2[2] * 2 + (long)v1[2]) / 3 );
// pVects[2][3] = (BYTE)( ((long)v2[3] * 2 + (long)v1[3]) / 3 );
__asm {
mov ecx, dword ptr pVects
mov eax, dword ptr v1
mov ebx, dword ptr v2
movd mm0, [eax]
movd mm1, [ebx]
pxor mm2, mm2
movd [ecx], mm0
movd [ecx+12], mm1
punpcklbw mm0, mm2
punpcklbw mm1, mm2
movq mm3, mm0 // mm3 = v0
paddw mm0, mm1 // mm0 = v0 + v1
paddw mm3, mm3 // mm3 = v0*2
paddw mm0, mm1 // mm0 = v0 + v1*2
paddw mm1, mm3 // mm1 = v0*2 + v1
pmulhw mm0, ScaleOneThird
pmulhw mm1, ScaleOneThird
packuswb mm1, mm0
movq [ecx+4], mm1
}
}
*/
void BlockDXT1::decodeBlock(ColorBlock * block) const
{
nvDebugCheck(block != NULL);
// Decode color block.
Color32 color_array[4];
evaluatePalette(color_array);
// Write color block.
for( uint j = 0; j < 4; j++ ) {
for( uint i = 0; i < 4; i++ ) {
uint idx = (row[j] >> (2 * i)) & 3;
block->color(i, j) = color_array[idx];
}
}
}
void BlockDXT1::setIndices(int * idx)
{
indices = 0;
for(uint i = 0; i < 16; i++) {
indices |= (idx[i] & 3) << (2 * i);
}
}
/// Flip DXT1 block vertically.
inline void BlockDXT1::flip4()
{
swap(row[0], row[3]);
swap(row[1], row[2]);
}
/// Flip half DXT1 block vertically.
inline void BlockDXT1::flip2()
{
swap(row[0], row[1]);
}
/*----------------------------------------------------------------------------
BlockDXT3
----------------------------------------------------------------------------*/
void BlockDXT3::decodeBlock(ColorBlock * block) const
{
nvDebugCheck(block != NULL);
// Decode color.
color.decodeBlock(block);
// Decode alpha.
block->color(0x0).a = (alpha.alpha0 << 4) | alpha.alpha0;
block->color(0x1).a = (alpha.alpha1 << 4) | alpha.alpha1;
block->color(0x2).a = (alpha.alpha2 << 4) | alpha.alpha2;
block->color(0x3).a = (alpha.alpha3 << 4) | alpha.alpha3;
block->color(0x4).a = (alpha.alpha4 << 4) | alpha.alpha4;
block->color(0x5).a = (alpha.alpha5 << 4) | alpha.alpha5;
block->color(0x6).a = (alpha.alpha6 << 4) | alpha.alpha6;
block->color(0x7).a = (alpha.alpha7 << 4) | alpha.alpha7;
block->color(0x8).a = (alpha.alpha8 << 4) | alpha.alpha8;
block->color(0x9).a = (alpha.alpha9 << 4) | alpha.alpha9;
block->color(0xA).a = (alpha.alphaA << 4) | alpha.alphaA;
block->color(0xB).a = (alpha.alphaB << 4) | alpha.alphaB;
block->color(0xC).a = (alpha.alphaC << 4) | alpha.alphaC;
block->color(0xD).a = (alpha.alphaD << 4) | alpha.alphaD;
block->color(0xE).a = (alpha.alphaE << 4) | alpha.alphaE;
block->color(0xF).a = (alpha.alphaF << 4) | alpha.alphaF;
}
/// Flip DXT3 alpha block vertically.
void AlphaBlockDXT3::flip4()
{
swap(row[0], row[3]);
swap(row[1], row[2]);
}
/// Flip half DXT3 alpha block vertically.
void AlphaBlockDXT3::flip2()
{
swap(row[0], row[1]);
}
/// Flip DXT3 block vertically.
void BlockDXT3::flip4()
{
alpha.flip4();
color.flip4();
}
/// Flip half DXT3 block vertically.
void BlockDXT3::flip2()
{
alpha.flip2();
color.flip2();
}
/*----------------------------------------------------------------------------
BlockDXT5
----------------------------------------------------------------------------*/
void AlphaBlockDXT5::evaluatePalette(uint8 alpha[8]) const
{
if (alpha0 > alpha1) {
evaluatePalette8(alpha);
}
else {
evaluatePalette6(alpha);
}
}
void AlphaBlockDXT5::evaluatePalette8(uint8 alpha[8]) const
{
// 8-alpha block: derive the other six alphas.
// Bit code 000 = alpha0, 001 = alpha1, others are interpolated.
alpha[0] = alpha0;
alpha[1] = alpha1;
alpha[2] = (6 * alpha0 + 1 * alpha1) / 7; // bit code 010
alpha[3] = (5 * alpha0 + 2 * alpha1) / 7; // bit code 011
alpha[4] = (4 * alpha0 + 3 * alpha1) / 7; // bit code 100
alpha[5] = (3 * alpha0 + 4 * alpha1) / 7; // bit code 101
alpha[6] = (2 * alpha0 + 5 * alpha1) / 7; // bit code 110
alpha[7] = (1 * alpha0 + 6 * alpha1) / 7; // bit code 111
}
void AlphaBlockDXT5::evaluatePalette6(uint8 alpha[8]) const
{
// 6-alpha block.
// Bit code 000 = alpha0, 001 = alpha1, others are interpolated.
alpha[0] = alpha0;
alpha[1] = alpha1;
alpha[2] = (4 * alpha0 + 1 * alpha1) / 5; // Bit code 010
alpha[3] = (3 * alpha0 + 2 * alpha1) / 5; // Bit code 011
alpha[4] = (2 * alpha0 + 3 * alpha1) / 5; // Bit code 100
alpha[5] = (1 * alpha0 + 4 * alpha1) / 5; // Bit code 101
alpha[6] = 0x00; // Bit code 110
alpha[7] = 0xFF; // Bit code 111
}
void AlphaBlockDXT5::indices(uint8 index_array[16]) const
{
index_array[0x0] = bits0;
index_array[0x1] = bits1;
index_array[0x2] = bits2;
index_array[0x3] = bits3;
index_array[0x4] = bits4;
index_array[0x5] = bits5;
index_array[0x6] = bits6;
index_array[0x7] = bits7;
index_array[0x8] = bits8;
index_array[0x9] = bits9;
index_array[0xA] = bitsA;
index_array[0xB] = bitsB;
index_array[0xC] = bitsC;
index_array[0xD] = bitsD;
index_array[0xE] = bitsE;
index_array[0xF] = bitsF;
/*
// @@ missaligned reads might be very expensive on some hardware.
uint b = (uint &) bits[0];
for(int i = 0; i < 8; i++) {
index_array[i] = uint8(b & 0x07);
b >>= 3;
}
b = (uint &) bits[3];
for(int i = 0; i < 8; i++) {
index_array[8+i] = uint8(b & 0x07);
b >>= 3;
}
*/
}
uint AlphaBlockDXT5::index(uint index) const
{
nvDebugCheck(index < 16);
int offset = (3 * index + 16);
return (this->u >> offset) & 0x7;
/*
if (index == 0x0) return bits0;
else if (index == 0x1) return bits1;
else if (index == 0x2) return bits2;
else if (index == 0x3) return bits3;
else if (index == 0x4) return bits4;
else if (index == 0x5) return bits5;
else if (index == 0x6) return bits6;
else if (index == 0x7) return bits7;
else if (index == 0x8) return bits8;
else if (index == 0x9) return bits9;
else if (index == 0xA) return bitsA;
else if (index == 0xB) return bitsB;
else if (index == 0xC) return bitsC;
else if (index == 0xD) return bitsD;
else if (index == 0xE) return bitsE;
else if (index == 0xF) return bitsF;
return 0;
*/
}
void AlphaBlockDXT5::setIndex(uint index, uint value)
{
nvDebugCheck(index < 16);
nvDebugCheck(value < 8);
int offset = (3 * index + 16);
uint64 mask = uint64(0x7) << offset;
this->u = (this->u & ~mask) | (uint64(value) << offset);
/*
// @@ Really bad code...
if (index == 0x0) bits0 = value;
else if (index == 0x1) bits1 = value;
else if (index == 0x2) bits2 = value;
else if (index == 0x3) bits3 = value;
else if (index == 0x4) bits4 = value;
else if (index == 0x5) bits5 = value;
else if (index == 0x6) bits6 = value;
else if (index == 0x7) bits7 = value;
else if (index == 0x8) bits8 = value;
else if (index == 0x9) bits9 = value;
else if (index == 0xA) bitsA = value;
else if (index == 0xB) bitsB = value;
else if (index == 0xC) bitsC = value;
else if (index == 0xD) bitsD = value;
else if (index == 0xE) bitsE = value;
else if (index == 0xF) bitsF = value;
*/
}
void AlphaBlockDXT5::flip4()
{
uint64 * b = (uint64 *)this;
// @@ The masks might have to be byte swapped.
uint64 tmp = (*b & POSH_U64(0x000000000000FFFF));
tmp |= (*b & POSH_U64(0x000000000FFF0000)) << 36;
tmp |= (*b & POSH_U64(0x000000FFF0000000)) << 12;
tmp |= (*b & POSH_U64(0x000FFF0000000000)) >> 12;
tmp |= (*b & POSH_U64(0xFFF0000000000000)) >> 36;
*b = tmp;
}
void AlphaBlockDXT5::flip2()
{
uint * b = (uint *)this;
// @@ The masks might have to be byte swapped.
uint tmp = (*b & 0xFF000000);
tmp |= (*b & 0x00000FFF) << 12;
tmp |= (*b & 0x00FFF000) >> 12;
*b = tmp;
}
void BlockDXT5::decodeBlock(ColorBlock * block) const
{
nvDebugCheck(block != NULL);
// Decode color.
color.decodeBlock(block);
// Decode alpha.
uint8 alpha_array[8];
alpha.evaluatePalette(alpha_array);
uint8 index_array[16];
alpha.indices(index_array);
for(uint i = 0; i < 16; i++) {
block->color(i).a = alpha_array[index_array[i]];
}
}
/// Flip DXT5 block vertically.
void BlockDXT5::flip4()
{
alpha.flip4();
color.flip4();
}
/// Flip half DXT5 block vertically.
void BlockDXT5::flip2()
{
alpha.flip2();
color.flip2();
}
/// Decode 3DC block.
void Block3DC::decodeBlock(ColorBlock * block) const
{
// @@ TBD
}
/// Flip 3DC block vertically.
void Block3DC::flip4()
{
y.flip4();
x.flip4();
}
/// Flip half 3DC block vertically.
void Block3DC::flip2()
{
y.flip2();
x.flip2();
}

176
src/nvimage/nvtt/BlockDXT.h Normal file
View File

@ -0,0 +1,176 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_BLOCKDXT_H
#define NV_TT_BLOCKDXT_H
#include <nvmath/Color.h>
#include "nvtt.h"
namespace nv
{
struct ColorBlock;
/// DXT1 block.
struct BlockDXT1
{
Color16 col0;
Color16 col1;
union {
uint8 row[4];
uint indices;
};
bool isFourColorMode() const;
uint evaluatePalette(Color32 color_array[4]) const;
uint evaluatePaletteFast(Color32 color_array[4]) const;
void evaluatePalette3(Color32 color_array[4]) const;
void evaluatePalette4(Color32 color_array[4]) const;
void decodeBlock(ColorBlock * block) const;
void setIndices(int * idx);
void flip4();
void flip2();
};
/// Return true if the block uses four color mode, false otherwise.
inline bool BlockDXT1::isFourColorMode() const
{
return col0.u >= col1.u; // @@ > or >= ?
}
/// DXT3 alpha block with explicit alpha.
struct AlphaBlockDXT3
{
union {
struct {
uint alpha0 : 4;
uint alpha1 : 4;
uint alpha2 : 4;
uint alpha3 : 4;
uint alpha4 : 4;
uint alpha5 : 4;
uint alpha6 : 4;
uint alpha7 : 4;
uint alpha8 : 4;
uint alpha9 : 4;
uint alphaA : 4;
uint alphaB : 4;
uint alphaC : 4;
uint alphaD : 4;
uint alphaE : 4;
uint alphaF : 4;
};
uint16 row[4];
};
void flip4();
void flip2();
};
/// DXT3 block.
struct BlockDXT3
{
AlphaBlockDXT3 alpha;
BlockDXT1 color;
void decodeBlock(ColorBlock * block) const;
void flip4();
void flip2();
};
/// DXT5 alpha block.
struct AlphaBlockDXT5
{
union {
struct {
uint64 alpha0 : 8; // 8
uint64 alpha1 : 8; // 16
uint64 bits0 : 3; // 3 - 19
uint64 bits1 : 3; // 6 - 22
uint64 bits2 : 3; // 9 - 25
uint64 bits3 : 3; // 12 - 28
uint64 bits4 : 3; // 15 - 31
uint64 bits5 : 3; // 18 - 34
uint64 bits6 : 3; // 21 - 37
uint64 bits7 : 3; // 24 - 40
uint64 bits8 : 3; // 27 - 43
uint64 bits9 : 3; // 30 - 46
uint64 bitsA : 3; // 33 - 49
uint64 bitsB : 3; // 36 - 52
uint64 bitsC : 3; // 39 - 55
uint64 bitsD : 3; // 42 - 58
uint64 bitsE : 3; // 45 - 61
uint64 bitsF : 3; // 48 - 64
};
uint64 u;
};
void evaluatePalette(uint8 alpha[8]) const;
void evaluatePalette8(uint8 alpha[8]) const;
void evaluatePalette6(uint8 alpha[8]) const;
void indices(uint8 index_array[16]) const;
uint index(uint index) const;
void setIndex(uint index, uint value);
void flip4();
void flip2();
};
/// DXT5 block.
struct BlockDXT5
{
AlphaBlockDXT5 alpha;
BlockDXT1 color;
void decodeBlock(ColorBlock * block) const;
void flip4();
void flip2();
};
/// 3DC block.
struct Block3DC
{
AlphaBlockDXT5 y;
AlphaBlockDXT5 x;
void decodeBlock(ColorBlock * block) const;
void flip4();
void flip2();
};
} // nv namespace
#endif // NV_TT_BLOCKDXT_H

View File

@ -0,0 +1,57 @@
PROJECT(nvtt)
ADD_SUBDIRECTORY(squish)
SET(NVTT_SRCS
nvtt.h
CompressDXT.h
CompressDXT.cpp
CompressRGB.h
CompressRGB.cpp
FastCompressDXT.h
FastCompressDXT.cpp
BlockDXT.h
BlockDXT.cpp
dxtlib.cpp
dxtlib_compat.h
CompressionOptions.h
CompressionOptions.cpp
InputOptions.h
InputOptions.cpp
OutputOptions.cpp
cuda/CudaUtils.h
cuda/CudaUtils.cpp
cuda/CudaCompressDXT.h
cuda/CudaCompressDXT.cpp)
IF(CUDA_FOUND)
ADD_DEFINITIONS(-DHAVE_CUDA)
WRAP_CUDA(CUDA_SRCS cuda/CompressKernel.cu)
SET(NVTT_SRCS ${NVTT_SRCS} ${CUDA_SRCS})
SET(LIBS ${LIBS} ${CUDA_LIBRARY})
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_PATH})
ENDIF(CUDA_FOUND)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVTT_EXPORTS)
IF(NVTT_SHARED)
ADD_LIBRARY(nvtt SHARED ${DXT_SRCS})
ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish)
# test executables
ADD_EXECUTABLE(nvcompress compress.cpp)
TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt)
INSTALL(TARGETS nvcompress DESTINATION bin)
#ADD_EXECUTABLE(nvtextool nvdxt.cpp configdialog.cpp)

View File

@ -0,0 +1,535 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Memory.h>
#include <nvimage/Image.h>
#include <nvimage/ColorBlock.h>
#include "nvtt.h"
#include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "BlockDXT.h"
#include "CompressionOptions.h"
// squish
#include "squish/colourset.h"
//#include "squish/clusterfit.h"
#include "squish/fastclusterfit.h"
#include "squish/weightedclusterfit.h"
// s3_quant
#if defined(HAVE_S3QUANT)
#include "s3tc/s3_quant.h"
#endif
// ati tc
#if defined(HAVE_ATITC)
#include "atitc/ATI_Compress.h"
#endif
//#include <time.h>
using namespace nv;
using namespace nvtt;
void nv::fastCompressDXT1(const Image * image, const OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
// @@ Use iterative optimization.
optimizeEndPoints(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT3 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
// copy X coordinate to green channel and Y coordinate to alpha channel.
rgba.swizzleDXT5n();
compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::fastCompressBC4(const Image * image, const nvtt::OutputOptions & outputOptions)
{
// @@ TODO
// compress red channel (X)
}
void nv::fastCompressBC5(const Image * image, const nvtt::OutputOptions & outputOptions)
{
// @@ TODO
// compress red, green channels (X,Y)
}
void nv::doPrecomputation()
{
static bool done = false;
if (!done)
{
done = true;
squish::FastClusterFit::doPrecomputation();
}
}
void nv::compressDXT1(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT1 block;
doPrecomputation();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
squish::FastClusterFit fit(&colours, squish::kDxt1);
//squish::WeightedClusterFit fit(&colours, squish::kDxt1);
//squish::ClusterFit fit(&colours, squish::kDxt1);
fit.setMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
fit.Compress(&block);
// @@ Use iterative cluster fit algorithm to improve error in highest quality mode.
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::compressDXT3(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT3 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
// Compress explicit alpha.
compressBlock(rgba, &block.alpha);
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
squish::WeightedClusterFit fit(&colours, 0);
fit.setMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
fit.Compress(&block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::compressDXT5(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
// Compress alpha.
uint error;
if (compressionOptions.quality == Quality_Highest)
{
error = compressBlock_BruteForce(rgba, &block.alpha);
}
else
{
error = compressBlock_Iterative(rgba, &block.alpha);
}
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
squish::WeightedClusterFit fit(&colours, 0);
fit.setMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
fit.Compress(&block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::compressDXT5n(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
doPrecomputation();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
// copy X coordinate to green channel and Y coordinate to alpha channel.
rgba.swizzleDXT5n();
// Compress Y.
uint error = compressBlock_Iterative(rgba, &block.alpha);
if (compressionOptions.quality == Quality_Highest)
{
error = compressBlock_BruteForce(rgba, &block.alpha);
}
// Compress X.
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
squish::FastClusterFit fit(&colours, 0);
fit.setMetric(0, 1, 0);
fit.Compress(&block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::compressBC4(const Image * image, const nvtt::OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
// threshold should be from [0 - 1] but may also be higher...
const uint threshold = uint(compressionOptions.errorThreshold * 256);
ColorBlock rgba;
AlphaBlockDXT5 block;
uint totalError = 0;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
//error = compressBlock_BoundsRange(rgba, &block);
uint error = compressBlock_Iterative(rgba, &block);
if (compressionOptions.quality == Quality_Highest ||
(compressionOptions.quality == Quality_Production && error > threshold))
{
// Try brute force algorithm.
error = compressBlock_BruteForce(rgba, &block);
}
totalError += error;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
// @@ All the compressors should work like this.
// Effect of adjusting threshold:
// (threshold: error - time)
// 0: 4.29 - 1.83
// 32: 4.32 - 1.77
// 48: 4.37 - 1.72
// 64: 4.43 - 1.45
// 74: 4.45 - 1.35
// 92: 4.54 - 1.15
// 128: 4.67 - 0.79
// 256: 4.92 - 0.20
// inf: 4.98 - 0.09
printf("Alpha error: %f\n", float(totalError) / (w*h));
}
void nv::compressBC5(const Image * image, const nvtt::OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock xcolor;
ColorBlock ycolor;
Block3DC block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
xcolor.init(image, x, y);
xcolor.splatX();
ycolor.init(image, x, y);
ycolor.splatY();
// @@ Compute normal error, instead of separate xy errors.
uint xerror, yerror;
if (compressionOptions.quality == Quality_Highest)
{
xerror = compressBlock_BruteForce(xcolor, &block.x);
yerror = compressBlock_BruteForce(ycolor, &block.y);
}
else
{
xerror = compressBlock_Iterative(xcolor, &block.x);
yerror = compressBlock_Iterative(ycolor, &block.y);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
#if defined(HAVE_S3QUANT)
void nv::s3CompressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
float error = 0.0f;
BlockDXT1 dxtBlock3;
BlockDXT1 dxtBlock4;
ColorBlock block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
block.init(image, x, y);
// Init rgb block.
RGBBlock rgbBlock;
rgbBlock.n = 16;
for (uint i = 0; i < 16; i++) {
rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f);
}
rgbBlock.weight[0] = 1.0f;
rgbBlock.weight[1] = 1.0f;
rgbBlock.weight[2] = 1.0f;
rgbBlock.inLevel = 4;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock4.col0.r = rgbBlock.endPoint[0][0];
dxtBlock4.col0.g = rgbBlock.endPoint[0][1];
dxtBlock4.col0.b = rgbBlock.endPoint[0][2];
dxtBlock4.col1.r = rgbBlock.endPoint[1][0];
dxtBlock4.col1.g = rgbBlock.endPoint[1][1];
dxtBlock4.col1.b = rgbBlock.endPoint[1][2];
dxtBlock4.setIndices(rgbBlock.index);
if (dxtBlock4.col0.u < dxtBlock4.col1.u) {
swap(dxtBlock4.col0.u, dxtBlock4.col1.u);
dxtBlock4.indices ^= 0x55555555;
}
uint error4 = blockError(block, dxtBlock4);
rgbBlock.inLevel = 3;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock3.col0.r = rgbBlock.endPoint[0][0];
dxtBlock3.col0.g = rgbBlock.endPoint[0][1];
dxtBlock3.col0.b = rgbBlock.endPoint[0][2];
dxtBlock3.col1.r = rgbBlock.endPoint[1][0];
dxtBlock3.col1.g = rgbBlock.endPoint[1][1];
dxtBlock3.col1.b = rgbBlock.endPoint[1][2];
dxtBlock3.setIndices(rgbBlock.index);
if (dxtBlock3.col0.u > dxtBlock3.col1.u) {
swap(dxtBlock3.col0.u, dxtBlock3.col1.u);
dxtBlock3.indices ^= (~dxtBlock3.indices >> 1) & 0x55555555;
}
uint error3 = blockError(block, dxtBlock3);
if (error3 < error4) {
error += error3;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3));
}
}
else {
error += error4;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4));
}
}
}
}
printf("error = %f\n", error/((w+3)/4 * (h+3)/4));
}
#endif // defined(HAVE_S3QUANT)
#if defined(HAVE_ATITC)
void nv::atiCompressDXT1(const Image * image, const OutputOptions & outputOptions)
{
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = image->width();
srcTexture.dwHeight = image->height();
srcTexture.dwPitch = image->width() * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) image->pixels();
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = image->width();
destTexture.dwHeight = image->height();
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
}
#endif // defined(HAVE_ATITC)

View File

@ -0,0 +1,65 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSDXT_H
#define NV_TT_COMPRESSDXT_H
#include <nvimage/nvimage.h>
#include "nvtt.h"
namespace nv
{
class Image;
class FloatImage;
void doPrecomputation();
// Fast compressors.
void fastCompressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressDXT3(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressDXT5(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressDXT5n(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressBC4(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressBC5(const Image * image, const nvtt::OutputOptions & outputOptions);
// Normal compressors.
void compressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5n(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC4(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC5(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
// External compressors.
#if defined(HAVE_S3QUANT)
void s3CompressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions);
#endif
#if defined(HAVE_ATITC)
void atiCompressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions);
#endif
} // nv namespace
#endif // NV_TT_COMPRESSDXT_H

View File

@ -0,0 +1,153 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <string.h>
#include <nvcore/Debug.h>
#include <nvimage/Image.h>
#include "CompressRGB.h"
#include "CompressionOptions.h"
using namespace nv;
using namespace nvtt;
namespace
{
inline uint computePitch(uint w, uint bitsize)
{
uint p = w * ((bitsize + 7) / 8);
// Align to 32 bits.
return ((p + 3) / 4) * 4;
}
static void convert_to_rgba8888(void * src, void * dst, uint w)
{
// @@ TODO
}
static void convert_to_bgra8888(const void * src, void * dst, uint w)
{
memcpy(dst, src, 4 * w);
}
static void convert_to_rgb888(const void * src, void * dst, uint w)
{
// @@ TODO
}
static uint truncate(uint c, uint inbits, uint outbits)
{
nvDebugCheck(inbits > outbits);
c >>= inbits - outbits;
}
static uint bitexpand(uint c, uint inbits, uint outbits)
{
// @@ TODO
}
static void maskShiftAndSize(uint mask, uint & shift, uint & size)
{
shift = 0;
while((mask & 1) == 0) {
shift++;
mask >>= 1;
}
while((mask & 1) == 1) {
size++;
mask >>= 1;
}
}
} // namespace
// Pixel format converter.
void nv::compressRGB(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
uint rshift, rsize;
maskShiftAndSize(compressionOptions.rmask, rshift, rsize);
uint gshift, gsize;
maskShiftAndSize(compressionOptions.gmask, gshift, gsize);
uint bshift, bsize;
maskShiftAndSize(compressionOptions.bmask, bshift, bsize);
uint ashift, asize;
maskShiftAndSize(compressionOptions.amask, ashift, asize);
// Determine pitch.
uint pitch = computePitch(w, compressionOptions.bitcount);
void * dst = malloc(pitch);
for (uint y = 0; y < h; y++)
{
const Color32 * src = image->scanline(y);
convert_to_bgra8888(src, dst, w);
if (false)
{
// uint c = 0;
// c |= (src[i].r >> (8 - rsize)) << rshift;
// c |= (src[i].g >> (8 - gsize)) << gshift;
// c |= (src[i].b >> (8 - bsize)) << bshift;
}
/*
if (rmask == 0xFF000000 && gmask == 0xFF0000 && bmask == 0xFF00 && amask == 0xFF)
{
convert_to_rgba8888(src, dst, w);
}
else if (rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0)
{
convert_to_rgb888(src, dst, w);
}
else
{
// @@ Not supported.
}
*/
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(dst, pitch);
}
}
free(dst);
}

View File

@ -0,0 +1,39 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSRGB_H
#define NV_TT_COMPRESSRGB_H
#include "nvtt.h"
namespace nv
{
class Image;
// Pixel format converter.
void compressRGB(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
} // nv namespace
#endif // NV_TT_COMPRESSDXT_H

View File

@ -0,0 +1,113 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "nvtt.h"
#include "CompressionOptions.h"
using namespace nv;
using namespace nvtt;
/// Constructor. Sets compression options to the default values.
CompressionOptions::CompressionOptions() : m(*new CompressionOptions::Private())
{
reset();
}
/// Destructor.
CompressionOptions::~CompressionOptions()
{
delete &m;
}
/// Set default compression options.
void CompressionOptions::reset()
{
m.format = Format_DXT1;
m.quality = Quality_Normal;
m.colorWeight.set(1.0f, 1.0f, 1.0f);
m.useCuda = true;
m.bitcount = 32;
m.bmask = 0x000000FF;
m.gmask = 0x0000FF00;
m.rmask = 0x00FF0000;
m.amask = 0xFF000000;
}
/// Set desired compression format.
void CompressionOptions::setFormat(Format format)
{
m.format = format;
}
/// Set compression quality settings.
void CompressionOptions::setQuality(Quality quality, float errorThreshold /*= 0.5f*/)
{
m.quality = quality;
m.errorThreshold = errorThreshold;
}
/// Set the weights of each color channel.
/// The choice for these values is subjective. In many case uniform color weights
/// (1.0, 1.0, 1.0) work very well. A popular choice is to use the NTSC luma encoding
/// weights (0.2126, 0.7152, 0.0722), but I think that blue contributes to our
/// perception more than a 7%. A better choice in my opinion is (3, 4, 2). Ideally
/// the compressor should use a non linear colour metric as described here:
/// http://www.compuphase.com/cmetric.htm
void CompressionOptions::setColorWeights(float red, float green, float blue)
{
float total = red + green + blue;
float x = blue / total;
float y = green / total;
m.colorWeight.set(x, y, 1.0f - x - y);
}
/// Enable or disable hardware compression.
void CompressionOptions::enableHardwareCompression(bool enable)
{
m.useCuda = enable;
}
/// Set color mask to describe the RGB/RGBA format.
void CompressionOptions::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask)
{
m.bitcount = bitcount;
m.rmask = rmask;
m.gmask = gmask;
m.bmask = bmask;
m.amask = amask;
}
/// Use external compressor.
void CompressionOptions::setExternalCompressor(const char * name)
{
m.externalCompressor = name;
}

View File

@ -0,0 +1,57 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSIONOPTIONS_H
#define NV_TT_COMPRESSIONOPTIONS_H
#include <nvcore/StrLib.h>
#include <nvmath/Vector.h>
#include "nvtt.h"
namespace nvtt
{
struct CompressionOptions::Private
{
Format format;
Quality quality;
float errorThreshold;
nv::Vector3 colorWeight;
uint bitcount;
uint rmask;
uint gmask;
uint bmask;
uint amask;
bool useCuda;
nv::String externalCompressor;
};
} // nvtt namespace
#endif // NV_TT_COMPRESSIONOPTIONS_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,81 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_FASTCOMPRESSDXT_H
#define NV_TT_FASTCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
// Color compression:
// Compressor that uses the extremes of the luminance axis.
void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the extremes of the luminance axis.
void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box.
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the best fit axis.
void compressBlock_BestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Simple, but slow compressor that tests all color pairs.
void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force 6d search along the best fit axis.
void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Spatial greedy search.
void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// Minimize error of the endpoints.
void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
// Alpha compression:
void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block);
uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
} // nv namespace
#endif // NV_TT_FASTCOMPRESSDXT_H

View File

@ -0,0 +1,250 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <string.h> // memcpy
#include <nvcore/Memory.h>
#include "nvtt.h"
#include "InputOptions.h"
using namespace nv;
using namespace nvtt;
namespace
{
static int countMipmaps(int w, int h, int d)
{
int mipmap = 0;
while (w != 1 && h != 1) {
w = max(1, w / 2);
h = max(1, h / 2);
d = max(1, d / 2);
mipmap++;
}
return mipmap + 1;
}
} // namespace
/// Constructor.
InputOptions::InputOptions() : m(*new InputOptions::Private())
{
reset();
}
// Delete images.
InputOptions::~InputOptions()
{
resetTextureLayout();
delete &m;
}
// Reset input options.
void InputOptions::reset()
{
m.wrapMode = WrapMode_Repeat;
m.textureType = TextureType_2D;
m.inputFormat = InputFormat_BGRA_8UB;
m.enableColorDithering = false;
m.enableAlphaDithering = false;
m.binaryAlpha = false;
m.alphaThreshold = 127;
m.alphaTransparency = true;
m.inputGamma = 2.2f;
m.outputGamma = 2.2f;
m.generateMipmaps = false;
m.maxLevel = -1;
m.mipmapFilter = MipmapFilter_Box;
m.normalizeMipmaps = false;
m.convertToNormalMap = false;
m.heightFactors.set(0.0f, 0.0f, 0.0f, 1.0f);
m.bumpFrequencyScale = Vector4(1.0f, 0.5f, 0.25f, 0.125f) / (1.0f + 0.5f + 0.25f + 0.125f);
}
// Setup the input image.
void InputOptions::setTextureLayout(TextureType type, int w, int h, int d /*= 1*/)
{
// Validate arguments.
nvCheck(w >= 0);
nvCheck(h >= 0);
nvCheck(d >= 0);
// Correct arguments.
if (w == 0) w = 1;
if (h == 0) h = 1;
if (d == 0) d = 1;
// Delete previous images.
resetTextureLayout();
m.textureType = type;
// Allocate images.
m.mipmapCount = countMipmaps(w, h, d);
m.faceCount = (type == TextureType_Cube) ? 6 : 1;
m.imageCount = m.mipmapCount * m.faceCount;
m.images = new Private::Image[m.imageCount];
for(int f = 0; f < m.faceCount; f++)
{
for (int mipLevel = 0; mipLevel < m.mipmapCount; mipLevel++)
{
Private::Image & img = m.images[f * m.mipmapCount + mipLevel];
img.width = w;
img.height = h;
img.depth = d;
img.mipLevel = mipLevel;
img.face = f;
img.data = NULL;
w = max(1, w / 2);
h = max(1, h / 2);
d = max(1, d / 2);
}
}
}
void InputOptions::resetTextureLayout()
{
if (m.images != NULL)
{
// Delete image array.
delete [] m.images;
m.images = NULL;
m.faceCount = 0;
m.mipmapCount = 0;
m.imageCount = 0;
}
}
// Copies the data to our internal structures.
bool InputOptions::setMipmapData(const void * data, int width, int height, int depth /*= 1*/, int face /*= 0*/, int mipLevel /*= 0*/)
{
nvCheck(depth == 1);
const int idx = face * m.mipmapCount + mipLevel;
if (m.images[idx].width != width || m.images[idx].height != height || m.images[idx].depth != depth || m.images[idx].mipLevel != mipLevel || m.images[idx].face != face)
{
// Invalid dimension or index.
return false;
}
m.images[idx].data = new nv::Image();
m.images[idx].data->allocate(width, height);
memcpy(m.images[idx].data->pixels(), data, width * height * 4);
return true;
}
/// Describe the format of the input.
void InputOptions::setFormat(InputFormat format, bool alphaTransparency)
{
m.inputFormat = format;
m.alphaTransparency = alphaTransparency;
}
/// Set gamma settings.
void InputOptions::setGamma(float inputGamma, float outputGamma)
{
m.inputGamma = inputGamma;
m.outputGamma = outputGamma;
}
/// Set texture wrappign mode.
void InputOptions::setWrapMode(WrapMode mode)
{
m.wrapMode = mode;
}
/// Set mipmapping options.
void InputOptions::setMipmapping(bool generateMipmaps, MipmapFilter filter/*= MipmapFilter_Kaiser*/, int maxLevel/*= -1*/)
{
m.generateMipmaps = generateMipmaps;
m.mipmapFilter = filter;
m.maxLevel = maxLevel;
}
/// Set quantization options.
/// @warning Do not enable dithering unless you know what you are doing. Quantization
/// introduces errors. It's better to let the compressor quantize the result to
/// minimize the error, instead of quantizing the data before handling it to
/// the compressor.
void InputOptions::setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold/*= 127*/)
{
m.enableColorDithering = colorDithering;
m.enableAlphaDithering = alphaDithering;
m.binaryAlpha = binaryAlpha;
m.alphaThreshold = alphaThreshold;
}
/// Enable normal map conversion.
void InputOptions::setConvertToNormalMap(bool convert)
{
m.convertToNormalMap = convert;
}
/// Set height evaluation factors.
void InputOptions::setHeightEvaluation(float redScale, float greenScale, float blueScale, float alphaScale)
{
// Do not normalize height factors.
// float total = redScale + greenScale + blueScale + alphaScale;
m.heightFactors = Vector4(redScale, greenScale, blueScale, alphaScale);
}
/// Set normal map conversion filter.
void InputOptions::setNormalFilter(float small, float medium, float big, float large)
{
float total = small + medium + big + large;
m.bumpFrequencyScale = Vector4(small, medium, big, large) / total;
}
/// Enable mipmap normalization.
void InputOptions::setNormalizeMipmaps(bool normalize)
{
m.normalizeMipmaps = normalize;
}

View File

@ -0,0 +1,91 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_INPUTOPTIONS_H
#define NV_TT_INPUTOPTIONS_H
#include <nvmath/Vector.h>
#include <nvimage/Image.h>
#include "nvtt.h"
namespace nvtt
{
struct InputOptions::Private
{
Private() : images(NULL) {}
WrapMode wrapMode;
TextureType textureType;
InputFormat inputFormat;
int faceCount;
int mipmapCount;
int imageCount;
struct Image;
Image * images;
// Quantization.
bool enableColorDithering;
bool enableAlphaDithering;
bool binaryAlpha;
int alphaThreshold; // reference value used for binary alpha quantization.
bool alphaTransparency; // set to true if alpha is used for transparency.
// Gamma conversion.
float inputGamma;
float outputGamma;
// Mipmap generation options.
bool generateMipmaps;
int maxLevel;
MipmapFilter mipmapFilter;
// Normal map options.
bool normalizeMipmaps;
bool convertToNormalMap;
nv::Vector4 heightFactors;
nv::Vector4 bumpFrequencyScale;
};
// Internal image structure.
struct InputOptions::Private::Image
{
Image() {}
~Image() { delete data; }
int mipLevel;
int face;
int width;
int height;
int depth;
nv::Image * data;
};
} // nvtt namespace
#endif // NV_TT_INPUTOPTIONS_H

View File

@ -0,0 +1,32 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "nvtt.h"
using namespace nvtt;
/// Set default output options.
void OutputOptions::reset()
{
// endiannes = native...
}

View File

@ -0,0 +1,44 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
#ifndef CMDLINE_H
#define CMDLINE_H
#include <nvcore/Debug.h>
#include <stdarg.h>
struct MyMessageHandler : public nv::MessageHandler {
MyMessageHandler() {
nv::debug::setMessageHandler( this );
}
~MyMessageHandler() {
nv::debug::resetMessageHandler();
}
virtual void log( const char * str, va_list arg ) {
va_list val;
va_copy(val, arg);
vfprintf(stderr, str, arg);
va_end(val);
}
};
struct MyAssertHandler : public nv::AssertHandler {
MyAssertHandler() {
nv::debug::setAssertHandler( this );
}
~MyAssertHandler() {
nv::debug::resetAssertHandler();
}
// Handler method, note that func might be NULL!
virtual int assert( const char *exp, const char *file, int line, const char *func ) {
fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line);
nv::debug::dumpInfo();
exit(1);
}
};
#endif // CMDLINE_H

View File

@ -0,0 +1,354 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvimage/Image.h>
#include <nvimage/nvtt/nvtt.h>
#include "cmdline.h"
#include <time.h> // clock
struct MyOutputHandler : public nvtt::OutputHandler
{
MyOutputHandler() : total(0), progress(0), percentage(0), stream(NULL) {}
MyOutputHandler(const char * name) : total(0), progress(0), percentage(0), stream(new nv::StdOutputStream(name)) {}
virtual ~MyOutputHandler() { delete stream; }
bool open(const char * name)
{
stream = new nv::StdOutputStream(name);
percentage = progress = 0;
if (stream->isError()) {
printf("Error opening '%s' for writting\n", name);
return false;
}
return true;
}
virtual void setTotal(int t)
{
total = t;
}
virtual void mipmap(int size, int width, int height, int depth, int face, int miplevel)
{
// ignore.
}
// Output data.
virtual void writeData(const void * data, int size)
{
nvDebugCheck(stream != NULL);
stream->serialize(const_cast<void *>(data), size);
progress += size;
int p = (100 * progress) / total;
if (p != percentage)
{
percentage = p;
printf("\r%d%%", percentage);
fflush(stdout);
}
}
int total;
int progress;
int percentage;
nv::StdOutputStream * stream;
};
struct MyErrorHandler : public nvtt::ErrorHandler
{
virtual void error(nvtt::Error e)
{
nvDebugBreak();
}
};
// Set color to normal map conversion options.
void setColorToNormalMap(nvtt::InputOptions & inputOptions)
{
inputOptions.setConvertToNormalMap(true);
inputOptions.setHeightEvaluation(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 0.0f);
//inputOptions.setNormalFilter(1.0f, 0, 0, 0);
//inputOptions.setNormalFilter(0.0f, 0, 0, 1);
inputOptions.setGamma(1.0f, 1.0f);
inputOptions.setNormalizeMipmaps(true);
}
// Set options for normal maps.
void setNormalMap(nvtt::InputOptions & inputOptions)
{
inputOptions.setConvertToNormalMap(false);
inputOptions.setGamma(1.0f, 1.0f);
inputOptions.setNormalizeMipmaps(true);
}
// Set options for color maps.
void setColorMap(nvtt::InputOptions & inputOptions)
{
inputOptions.setConvertToNormalMap(false);
inputOptions.setGamma(2.2f, 2.2f);
inputOptions.setNormalizeMipmaps(false);
}
int main(int argc, char *argv[])
{
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
bool normal = false;
bool color2normal = false;
bool wrapRepeat = false;
bool noMipmaps = false;
bool fast = false;
bool nocuda = false;
nvtt::Format format = nvtt::Format_BC1;
const char * externalCompressor = NULL;
nv::Path input;
nv::Path output;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
// Input options.
if (strcmp("-color", argv[i]) == 0)
{
}
else if (strcmp("-normal", argv[i]) == 0)
{
normal = true;
}
else if (strcmp("-tonormal", argv[i]) == 0)
{
color2normal = true;
}
else if (strcmp("-clamp", argv[i]) == 0)
{
}
else if (strcmp("-repeat", argv[i]) == 0)
{
wrapRepeat = true;
}
else if (strcmp("-nomips", argv[i]) == 0)
{
noMipmaps = true;
}
// Compression options.
else if (strcmp("-fast", argv[i]) == 0)
{
fast = true;
}
else if (strcmp("-nocuda", argv[i]) == 0)
{
nocuda = true;
}
else if (strcmp("-rgb", argv[i]) == 0)
{
format = nvtt::Format_RGB;
}
else if (strcmp("-bc1", argv[i]) == 0)
{
format = nvtt::Format_BC1;
}
else if (strcmp("-bc2", argv[i]) == 0)
{
format = nvtt::Format_BC2;
}
else if (strcmp("-bc3", argv[i]) == 0)
{
format = nvtt::Format_BC3;
}
else if (strcmp("-bc3n", argv[i]) == 0)
{
format = nvtt::Format_BC3n;
}
else if (strcmp("-bc4", argv[i]) == 0)
{
format = nvtt::Format_BC4;
}
else if (strcmp("-bc5", argv[i]) == 0)
{
format = nvtt::Format_BC5;
}
// Undocumented option. Mainly used for testing.
else if (strcmp("-ext", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
externalCompressor = argv[i+1];
printf("using %s\n", argv[i+1]);
i++;
}
}
else if (argv[i][0] != '-')
{
input = argv[i];
if (i+1 < argc && argv[i+1][0] != '-') {
output = argv[i+1];
}
else
{
output.copy(input.str());
output.stripExtension();
output.append(".dds");
}
break;
}
}
if (input.empty())
{
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
printf("usage: nvcompress [options] infile [outfile]\n\n");
printf("Input options:\n");
printf(" -color \tThe input image is a color map (default).\n");
printf(" -normal \tThe input image is a normal map.\n");
printf(" -tonormal\tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n");
printf(" -repeat \tRepeat wrapping mode.\n");
printf(" -nomips \tDisable mipmap generation.\n\n");
printf("Compression options:\n");
printf(" -fast \tFast compression.\n");
printf(" -nocuda \tDo not use cuda compressor.\n");
printf(" -rgb \tRGBA format\n");
printf(" -bc1 \tBC1 format (DXT1)\n");
printf(" -bc2 \tBC2 format (DXT3)\n");
printf(" -bc3 \tBC3 format (DXT5)\n");
printf(" -bc3n \tBC3 normal map format (DXT5n/RXGB)\n");
printf(" -bc4 \tBC4 format (ATI1)\n");
printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n");
return 1;
}
nv::Image image;
if (!image.load(input))
{
printf("The file '%s' is not a supported image type.\n", input.str());
return 1;
}
MyErrorHandler errorHandler;
MyOutputHandler outputHandler(output);
if (outputHandler.stream->isError())
{
printf("Error opening '%s' for writting\n", output.str());
return 1;
}
// Set input options.
nvtt::InputOptions inputOptions;
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
inputOptions.setMipmapData(image.pixels(), image.width(), image.height());
if (fast)
{
inputOptions.setMipmapping(true, nvtt::MipmapFilter_Box);
}
else
{
inputOptions.setMipmapping(true, nvtt::MipmapFilter_Kaiser);
}
if (wrapRepeat)
{
inputOptions.setWrapMode(nvtt::WrapMode_Repeat);
}
else
{
inputOptions.setWrapMode(nvtt::WrapMode_Clamp);
}
if (normal)
{
setNormalMap(inputOptions);
}
else if (color2normal)
{
setColorToNormalMap(inputOptions);
}
else
{
setColorMap(inputOptions);
}
if (noMipmaps)
{
inputOptions.setMipmapping(false);
}
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(format);
if (fast)
{
compressionOptions.setQuality(nvtt::Quality_Fastest);
}
else
{
compressionOptions.setQuality(nvtt::Quality_Normal);
//compressionOptions.setQuality(nvtt::Quality_Production, 0.5f);
//compressionOptions.setQuality(nvtt::Quality_Highest);
}
compressionOptions.enableHardwareCompression(!nocuda);
compressionOptions.setColorWeights(1, 1, 1);
if (externalCompressor != NULL)
{
compressionOptions.setExternalCompressor(externalCompressor);
}
outputHandler.setTotal(nvtt::estimateSize(inputOptions, compressionOptions));
nvtt::OutputOptions outputOptions(&outputHandler, &errorHandler);
clock_t start = clock();
nvtt::compress(inputOptions, outputOptions, compressionOptions);
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return 0;
}

View File

@ -0,0 +1,481 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "CudaMath.h"
#define THREAD_NUM 64 // Number of threads per block.
#if __DEVICE_EMULATION__
#define __debugsync() __syncthreads()
#else
#define __debugsync()
#endif
typedef unsigned short ushort;
typedef unsigned int uint;
template <class T>
__device__ inline void swap(T & a, T & b)
{
T tmp = a;
a = b;
b = tmp;
}
__constant__ float3 kColorMetric = { 1.0f, 1.0f, 1.0f };
////////////////////////////////////////////////////////////////////////////////
// Round color to RGB565 and expand
////////////////////////////////////////////////////////////////////////////////
inline __device__ float3 roundAndExpand(float3 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 31.0f);
v.y = rintf(__saturatef(v.y) * 63.0f);
v.z = rintf(__saturatef(v.z) * 31.0f);
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z;
v.x *= 0.03227752766457f; // approximate integer bit expansion.
v.y *= 0.01583151765563f;
v.z *= 0.03227752766457f;
return v;
}
////////////////////////////////////////////////////////////////////////////////
// Evaluate permutations
////////////////////////////////////////////////////////////////////////////////
static __device__ float evalPermutation4(const float3 * colors, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
// Compute alpha & beta for this permutation.
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
float beta = (bits & 1);
if (bits & 2) beta = (1 + beta) / 3.0f;
float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * colors[i];
betax_sum += beta * colors[i];
}
// alpha2, beta2, alphabeta and factor could be precomputed for each permutation, but it's faster to recompute them.
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start);
b = roundAndExpand(b, end);
// compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return dot(e, kColorMetric);
}
static __device__ float evalPermutation3(const float3 * colors, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
// Compute alpha & beta for this permutation.
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
float beta = (bits & 1);
if (bits & 2) beta = 0.5f;
float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * colors[i];
betax_sum += beta * colors[i];
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start);
b = roundAndExpand(b, end);
// compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return dot(e, kColorMetric);
}
////////////////////////////////////////////////////////////////////////////////
// Sort colors
////////////////////////////////////////////////////////////////////////////////
__device__ void sortColors(float * values, float3 * colors, int * xrefs)
{
#if __DEVICE_EMULATION__
if (threadIdx.x == 0)
{
for( int i = 0; i < 16; ++i )
{
xrefs[i] = i;
}
// Use a sequential sort on emulation.
for( int i = 0; i < 16; ++i )
{
for( int j = i; j > 0 && values[j] < values[j - 1]; --j )
{
swap( values[j], values[j - 1] );
swap( xrefs[j], xrefs[j - 1] );
// swap( colors[j], colors[j - 1] );
}
}
float3 tmp[16];
for( int i = 0; i < 16; ++i )
{
tmp[i] = colors[i];
}
for( int i = 0; i < 16; ++i )
{
int xid = xrefs[i];
colors[i] = tmp[xid];
}
}
#else
int tid = threadIdx.x;
xrefs[tid] = tid;
// Parallel bitonic sort.
for (int k = 2; k <= 16; k *= 2)
{
// bitonic merge:
for (int j = k / 2; j>0; j /= 2)
{
int ixj = tid ^ j;
if (ixj > tid) {
// @@ Optimize these branches.
if ((tid & k) == 0) {
if (values[xrefs[tid]] > values[xrefs[ixj]]) {
// swap(values[tid], values[ixj]);
swap(colors[tid], colors[ixj]);
swap(xrefs[tid], xrefs[ixj]);
}
}
else {
if (values[xrefs[tid]] < values[xrefs[ixj]]) {
// swap(values[tid], values[ixj]);
swap(colors[tid], colors[ixj]);
swap(xrefs[tid], xrefs[ixj]);
}
}
}
}
}
#endif
// It would be faster to avoid color swaps during the sort, but there
// are compiler bugs preventing that.
#if 0
float3 tmp = colors[xrefs[tid]];
colors[tid] = tmp;
#endif
}
// This sort is faster, but does not sort correctly elements with the same value.
__device__ void sortColors2(float * values, float3 * colors, int * cmp)
{
int tid = threadIdx.x;
cmp[tid] = (values[0] < values[tid]);
cmp[tid] += (values[1] < values[tid]);
cmp[tid] += (values[2] < values[tid]);
cmp[tid] += (values[3] < values[tid]);
cmp[tid] += (values[4] < values[tid]);
cmp[tid] += (values[5] < values[tid]);
cmp[tid] += (values[6] < values[tid]);
cmp[tid] += (values[7] < values[tid]);
cmp[tid] += (values[8] < values[tid]);
cmp[tid] += (values[9] < values[tid]);
cmp[tid] += (values[10] < values[tid]);
cmp[tid] += (values[11] < values[tid]);
cmp[tid] += (values[12] < values[tid]);
cmp[tid] += (values[13] < values[tid]);
cmp[tid] += (values[14] < values[tid]);
cmp[tid] += (values[15] < values[tid]);
float3 tmp = colors[tid];
colors[cmp[tid]] = tmp;
}
////////////////////////////////////////////////////////////////////////////////
// Find index with minimum error
////////////////////////////////////////////////////////////////////////////////
__device__ void minimizeError(float * errors, int * indices)
{
const int idx = threadIdx.x;
#if __DEVICE_EMULATION__
for(int d = THREAD_NUM/2; d > 0; d >>= 1)
{
__syncthreads();
if (idx < d)
{
float err0 = errors[idx];
float err1 = errors[idx + d];
if (err1 < err0) {
errors[idx] = err1;
indices[idx] = indices[idx + d];
}
}
}
#else
for(int d = THREAD_NUM/2; d > 32; d >>= 1)
{
__syncthreads();
if (idx < d)
{
float err0 = errors[idx];
float err1 = errors[idx + d];
if (err1 < err0) {
errors[idx] = err1;
indices[idx] = indices[idx + d];
}
}
}
// unroll last 6 steps
if (idx <= 32)
{
if (errors[idx + 32] < errors[idx]) {
errors[idx] = errors[idx + 32];
indices[idx] = indices[idx + 32];
}
if (errors[idx + 16] < errors[idx]) {
errors[idx] = errors[idx + 16];
indices[idx] = indices[idx + 16];
}
if (errors[idx + 8] < errors[idx]) {
errors[idx] = errors[idx + 8];
indices[idx] = indices[idx + 8];
}
if (errors[idx + 4] < errors[idx]) {
errors[idx] = errors[idx + 4];
indices[idx] = indices[idx + 4];
}
if (errors[idx + 2] < errors[idx]) {
errors[idx] = errors[idx + 2];
indices[idx] = indices[idx + 2];
}
if (errors[idx + 1] < errors[idx]) {
errors[idx] = errors[idx + 1];
indices[idx] = indices[idx + 1];
}
}
#endif
}
////////////////////////////////////////////////////////////////////////////////
// Compress color block
////////////////////////////////////////////////////////////////////////////////
__global__ void compress(const uint * permutations, const uint * image, uint * result)
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
__shared__ float3 colors[16];
__shared__ float dps[16];
__shared__ int xrefs[16];
if (idx < 16)
{
// Read color.
uint c = image[(bid) * 16 + idx];
// No need to synchronize, 16 < warp size.
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Copy color to shared mem.
colors[idx].z = ((c >> 0) & 0xFF) * (1.0f / 255.0f);
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Sort colors along the best fit line.
float3 axis = bestFitLine(colors);
dps[idx] = dot(colors[idx], axis);
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
sortColors(dps, colors, xrefs);
}
ushort bestStart, bestEnd;
uint bestPermutation;
float bestError = FLT_MAX;
__syncthreads();
for(int i = 0; i < 16; i++)
{
if (i == 15 && idx >= 32) break;
ushort start, end;
uint permutation = permutations[idx + THREAD_NUM * i];
float error = evalPermutation4(colors, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
for(int i = 0; i < 3; i++)
{
if (i == 2 && idx >= 32) break;
ushort start, end;
uint permutation = permutations[idx + THREAD_NUM * i];
float error = evalPermutation3(colors, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
if (bestStart > bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
}
}
}
if (bestStart == bestEnd)
{
bestPermutation = 0;
}
__syncthreads();
// Use a parallel reduction to find minimum error.
__shared__ float errors[THREAD_NUM];
__shared__ int indices[THREAD_NUM];
errors[idx] = bestError;
indices[idx] = idx;
minimizeError(errors, indices);
__syncthreads();
// Only write the result of the winner thread.
if (idx == indices[0])
{
// Reorder permutation.
uint perm = 0;
for(int i = 0; i < 16; i++)
{
int ref = xrefs[i];
perm |= ((bestPermutation >> (2 * i)) & 3) << (2 * ref);
}
// Write endpoints. (bestStart, bestEnd)
result[2 * bid + 0] = (bestEnd << 16) | bestStart;
// Write palette indices (permutation).
result[2 * bid + 1] = perm;
}
}
////////////////////////////////////////////////////////////////////////////////
// Launch kernel
////////////////////////////////////////////////////////////////////////////////
extern "C" void compressKernel(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps, float weights[3])
{
// Set constants.
cudaMemcpyToSymbol(kColorMetric, weights, sizeof(float) * 3, 0);
compress<<<blockNum, THREAD_NUM>>>(d_bitmaps, d_data, d_result);
}

View File

@ -0,0 +1,264 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Containers.h>
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include <nvimage/nvtt/CompressionOptions.h>
#include "CudaCompressDXT.h"
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda_runtime.h>
#endif
using namespace nv;
using namespace nvtt;
#if defined HAVE_CUDA
extern "C" void compressKernel(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps, float weights[3]);
static uint * d_bitmaps = NULL;
static void doPrecomputation()
{
if (d_bitmaps != NULL) {
return;
}
uint bitmaps[1024];
int indices[16];
int num = 0;
// Compute bitmaps with 3 clusters:
// first cluster [0,i) is at the start
for( int m = 0; m < 16; ++m )
{
indices[m] = 0;
}
const int imax = 15;
for( int i = imax; i >= 0; --i )
{
// second cluster [i,j) is half along
for( int m = i; m < 16; ++m )
{
indices[m] = 2;
}
const int jmax = ( i == 0 ) ? 15 : 16;
for( int j = jmax; j >= i; --j )
{
// last cluster [j,k) is at the end
if( j < 16 )
{
indices[j] = 1;
}
uint bitmap = 0;
for(int p = 0; p < 16; p++) {
bitmap |= indices[p] << (p * 2);
}
bitmaps[num] = bitmap;
num++;
}
}
nvDebugCheck(num == 151);
// Align to 160.
for(int i = 0; i < 9; i++)
{
bitmaps[num] = 0x000AA555;
num++;
}
nvDebugCheck(num == 160);
// Append bitmaps with 4 clusters:
// first cluster [0,i) is at the start
for( int m = 0; m < 16; ++m )
{
indices[m] = 0;
}
for( int i = imax; i >= 0; --i )
{
// second cluster [i,j) is one third along
for( int m = i; m < 16; ++m )
{
indices[m] = 2;
}
const int jmax = ( i == 0 ) ? 15 : 16;
for( int j = jmax; j >= i; --j )
{
// third cluster [j,k) is two thirds along
for( int m = j; m < 16; ++m )
{
indices[m] = 3;
}
int kmax = ( j == 0 ) ? 15 : 16;
for( int k = kmax; k >= j; --k )
{
// last cluster [k,n) is at the end
if( k < 16 )
{
indices[k] = 1;
}
uint bitmap = 0;
bool hasThree = false;
for(int p = 0; p < 16; p++) {
bitmap |= indices[p] << (p * 2);
if (indices[p] == 3) hasThree = true;
}
if (hasThree) {
bitmaps[num] = bitmap;
num++;
}
}
}
}
nvDebugCheck(num == 975);
// Align to 1024.
for(int i = 0; i < 49; i++)
{
bitmaps[num] = 0x00AAFF55;
num++;
}
nvDebugCheck(num == 1024);
// Upload bitmaps.
cudaMalloc((void**) &d_bitmaps, 1024 * sizeof(uint));
cudaMemcpy(d_bitmaps, bitmaps, 1024 * sizeof(uint), cudaMemcpyHostToDevice);
// @@ Check for errors.
}
#endif
/// Compress image using CUDA.
void nv::cudaCompressDXT1(const Image * image, const OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
doPrecomputation();
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
// Convert linear image to block linear.
for(uint by = 0; by < h; by++) {
for(uint bx = 0; bx < w; bx++) {
const uint bw = min(image->width() - bx * 4, 4U);
const uint bh = min(image->height() - by * 4, 4U);
for (uint i = 0; i < 16; i++) {
const int x = (i & 3) % bw;
const int y = (i / 4) % bh;
blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
}
}
}
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
const uint blockMax = 32768; // 65535
// Allocate image in device memory.
uint * d_data = NULL;
cudaMalloc((void**) &d_data, min(imageSize, blockMax * 64U));
// Allocate result.
uint * d_result = NULL;
cudaMalloc((void**) &d_result, min(compressedSize, blockMax * 8U));
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, blockMax);
cudaMemcpy(d_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
float weights[3];
weights[0] = compressionOptions.colorWeight.x();
weights[1] = compressionOptions.colorWeight.y();
weights[2] = compressionOptions.colorWeight.z();
compressKernel(count, d_data, d_result, d_bitmaps, weights);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(nvtt::Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, d_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
free(blockLinearImage);
cudaFree(d_data);
cudaFree(d_result);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}

View File

@ -0,0 +1,39 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_CUDACOMPRESSDXT_H
#define NV_TT_CUDACOMPRESSDXT_H
#include <nvimage/nvimage.h>
#include <nvimage/nvtt/nvtt.h>
namespace nv
{
class Image;
void cudaCompressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
} // nv namespace
#endif // NV_TT_CUDAUTILS_H

View File

@ -0,0 +1,214 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// Math functions and operators to be used with vector types.
#ifndef CUDAMATH_H
#define CUDAMATH_H
#include <float.h>
inline __device__ __host__ float3 operator *(float3 a, float3 b)
{
return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
}
inline __device__ __host__ float3 operator *(float f, float3 v)
{
return make_float3(v.x*f, v.y*f, v.z*f);
}
inline __device__ __host__ float3 operator *(float3 v, float f)
{
return make_float3(v.x*f, v.y*f, v.z*f);
}
inline __device__ __host__ float3 operator +(float3 a, float3 b)
{
return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
}
inline __device__ __host__ void operator +=(float3 & b, float3 a)
{
b.x += a.x;
b.y += a.y;
b.z += a.z;
}
inline __device__ __host__ float3 operator -(float3 a, float3 b)
{
return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
}
inline __device__ __host__ void operator -=(float3 & b, float3 a)
{
b.x -= a.x;
b.y -= a.y;
b.z -= a.z;
}
inline __device__ __host__ float3 operator /(float3 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float3 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
b.z *= inv;
}
inline __device__ __host__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
inline __device__ __host__ float dot(float4 a, float4 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
}
inline __device__ __host__ float clamp(float f, float a, float b)
{
return max(a, min(f, b));
}
inline __device__ __host__ float3 clamp(float3 v, float a, float b)
{
return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
{
return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
inline __device__ __host__ float3 normalize(float3 v)
{
float len = 1.0f / dot(v, v);
return make_float3(v.x * len, v.y * len, v.z * len);
}
// Use power method to find the first eigenvector.
// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html
inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
{
// 8 iterations seems to be more than enough.
float3 v = make_float3(1.0f, 1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
float m = max(max(x, y), z);
float iv = 1.0f / m;
#if __DEVICE_EMULATION__
if (m == 0.0f) iv = 0.0f;
#endif
v = make_float3(x*iv, y*iv, z*iv);
}
return v;
}
inline __device__ float3 bestFitLine(const float3 * colors)
{
#if __DEVICE_EMULATION__
// Compute covariance matrix of the given colors.
float3 center = make_float3(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
center += colors[i];
}
center /= 16.0f;
float covariance[6] = {0, 0, 0, 0, 0, 0};
for (int i = 0; i < 16; i++)
{
float3 a = colors[i] - center;
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[2] += a.x * a.z;
covariance[3] += a.y * a.y;
covariance[4] += a.y * a.z;
covariance[5] += a.z * a.z;
}
#else
const int idx = threadIdx.x;
__shared__ float3 colorSum[16];
colorSum[idx] = colors[idx];
// Unrolled parallel reduction.
if (idx < 8) {
colorSum[idx] += colorSum[idx + 8];
colorSum[idx] += colorSum[idx + 4];
colorSum[idx] += colorSum[idx + 2];
colorSum[idx] += colorSum[idx + 1];
}
// @@ Eliminate two-way bank conflicts here.
// @@ It seems that doing that and unrolling the reduction doesn't help...
__shared__ float covariance[16*6];
colorSum[idx] = colors[idx] - colorSum[0] / 16.0f;
covariance[6 * idx + 0] = colorSum[idx].x * colorSum[idx].x; // 0, 6, 12, 2, 8, 14, 4, 10, 0
covariance[6 * idx + 1] = colorSum[idx].x * colorSum[idx].y;
covariance[6 * idx + 2] = colorSum[idx].x * colorSum[idx].z;
covariance[6 * idx + 3] = colorSum[idx].y * colorSum[idx].y;
covariance[6 * idx + 4] = colorSum[idx].y * colorSum[idx].z;
covariance[6 * idx + 5] = colorSum[idx].z * colorSum[idx].z;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[6 * idx + 0] += covariance[6 * (idx+d) + 0];
covariance[6 * idx + 1] += covariance[6 * (idx+d) + 1];
covariance[6 * idx + 2] += covariance[6 * (idx+d) + 2];
covariance[6 * idx + 3] += covariance[6 * (idx+d) + 3];
covariance[6 * idx + 4] += covariance[6 * (idx+d) + 4];
covariance[6 * idx + 5] += covariance[6 * (idx+d) + 5];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector(covariance);
}
#endif // CUDAMATH_H

View File

@ -0,0 +1,109 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda_runtime.h>
#endif
using namespace nv;
using namespace cuda;
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
}
#endif
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
return !isWindowsVista() && deviceCount() > 0;
//return !isWindowsVista() && isWow32() && deviceCount() > 0;
#else
return false;
#endif
}
/// Get number of CUDA enabled devices.
int nv::cuda::deviceCount()
{
#if defined HAVE_CUDA
int gpuCount = 0;
cudaError_t result = cudaGetDeviceCount(&gpuCount);
if (result == cudaSuccess)
{
return gpuCount;
}
#endif
return 0;
}
/// Activate the given devices.
bool nv::cuda::setDevice(int i)
{
nvCheck(i < deviceCount());
#if defined HAVE_CUDA
cudaError_t result = cudaSetDevice(i);
return result == cudaSuccess;
#else
return false;
#endif
}

View File

@ -0,0 +1,40 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_CUDAUTILS_H
#define NV_TT_CUDAUTILS_H
namespace nv
{
namespace cuda
{
bool isHardwarePresent();
int deviceCount();
bool setDevice(int i);
};
} // nv namespace
#endif // NV_TT_CUDAUTILS_H

486
src/nvimage/nvtt/dxtlib.cpp Normal file
View File

@ -0,0 +1,486 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Memory.h>
#include <nvcore/Ptr.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include <nvimage/Filter.h>
#include <nvimage/Quantize.h>
#include <nvimage/NormalMap.h>
#include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "CompressRGB.h"
#include "BlockDXT.h"
#include "InputOptions.h"
#include "CompressionOptions.h"
#include "cuda/CudaUtils.h"
#include "cuda/CudaCompressDXT.h"
using namespace nv;
using namespace nvtt;
namespace
{
static int blockSize(Format format)
{
if (format == Format_DXT1 /*|| format == Format_DXT1a*/) {
return 8;
}
else if (format == Format_DXT3) {
return 16;
}
else if (format == Format_DXT5 || format == Format_DXT5n) {
return 16;
}
else if (format == Format_BC4) {
return 8;
}
else if (format == Format_BC5) {
return 16;
}
return 0;
}
static int computeImageSize(int w, int h, Format format)
{
if (format == Format_RGBA) {
return w * h * sizeof(Color32);
}
else {
return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format);
}
}
} // namespace
//
// compress
//
static void outputHeader(const InputOptions::Private & inputOptions, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
// Output DDS header.
if (outputOptions.outputHandler != NULL && outputOptions.outputHeader)
{
DDSHeader header;
// Only 1 face and 2d textures supported.
nvCheck(inputOptions.faceCount == 1);
InputOptions::Private::Image * img = inputOptions.images;
nvCheck(img != NULL);
header.setWidth(img->width);
header.setHeight(img->height);
int mipmapCount = inputOptions.mipmapCount;
if (!inputOptions.generateMipmaps) mipmapCount = 0;
else if (inputOptions.maxLevel != -1 && inputOptions.maxLevel < mipmapCount) mipmapCount = inputOptions.maxLevel;
header.setMipmapCount(mipmapCount);
if (inputOptions.textureType == TextureType_2D) {
header.setTexture2D();
}
else if (inputOptions.textureType == TextureType_Cube) {
header.setTextureCube();
}
/*else if (inputOptions.textureType == TextureType_3D) {
header.setTexture3D();
header.setDepth(img->depth);
}*/
if (compressionOptions.format == Format_RGBA)
{
header.setPitch(4 * img->width);
header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask);
}
else
{
header.setLinearSize(computeImageSize(img->width, img->height, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 /*|| compressionOptions.format == Format_DXT1a*/) {
header.setFourCC('D', 'X', 'T', '1');
}
else if (compressionOptions.format == Format_DXT3) {
header.setFourCC('D', 'X', 'T', '3');
}
else if (compressionOptions.format == Format_DXT5) {
header.setFourCC('D', 'X', 'T', '5');
}
else if (compressionOptions.format == Format_DXT5n) {
header.setFourCC('R', 'X', 'G', 'B');
}
else if (compressionOptions.format == Format_BC4) {
header.setFourCC('A', 'T', 'I', '1');
}
else if (compressionOptions.format == Format_BC5) {
header.setFourCC('A', 'T', 'I', '2');
}
}
// Swap bytes if necessary.
header.swapBytes();
nvStaticCheck(sizeof(DDSHeader) == 128);
outputOptions.outputHandler->writeData(&header, 128);
// Revert swap.
header.swapBytes();
}
}
static bool compressMipmap(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvDebugCheck(image != NULL);
if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
{
compressRGB(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_DXT1)
{
#if defined(HAVE_S3QUANT)
if (compressionOptions.externalCompressor == "s3")
{
s3CompressDXT1(image, outputOptions);
}
else
#endif
#if defined(HAVE_ATITC)
if (compressionOptions.externalCompressor == "ati")
{
printf("ATI\n");
atiCompressDXT1(image, outputOptions);
}
else
#endif
if (compressionOptions.useCuda && nv::cuda::isHardwarePresent())
{
cudaCompressDXT1(image, outputOptions, compressionOptions);
}
else
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT1(image, outputOptions);
}
else
{
compressDXT1(image, outputOptions, compressionOptions);
}
}
}
else if (compressionOptions.format == Format_DXT3)
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT3(image, outputOptions);
}
else
{
compressDXT3(image, outputOptions, compressionOptions);
}
}
else if (compressionOptions.format == Format_DXT5)
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT5(image, outputOptions);
}
else
{
compressDXT5(image, outputOptions, compressionOptions);
}
}
else if (compressionOptions.format == Format_DXT5n)
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT5n(image, outputOptions);
}
else
{
compressDXT5n(image, outputOptions, compressionOptions);
}
}
else if (compressionOptions.format == Format_BC4)
{
compressBC4(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_BC5)
{
compressBC5(image, outputOptions, compressionOptions);
}
return true;
}
// Convert input image to linear float image.
static FloatImage * toFloatImage(const Image * image, const InputOptions::Private & inputOptions)
{
nvDebugCheck(image != NULL);
FloatImage * floatImage = new FloatImage(image);
// Convert to linear space.
if (inputOptions.inputGamma != 1.0f) {
floatImage->toLinear(0, 3, inputOptions.inputGamma);
}
return floatImage;
}
// Convert linear float image to output image.
static Image * toFixedImage(const FloatImage * floatImage, const InputOptions::Private & inputOptions)
{
nvDebugCheck(floatImage != NULL);
return floatImage->createImageGammaCorrect(inputOptions.outputGamma);
}
// Create mipmap from the given image.
static FloatImage * createMipmap(const FloatImage * floatImage, const InputOptions::Private & inputOptions)
{
FloatImage * result = NULL;
if (inputOptions.mipmapFilter == MipmapFilter_Box)
{
// Use fast downsample.
result = floatImage->fastDownSample();
}
else if (inputOptions.mipmapFilter == MipmapFilter_Triangle)
{
Kernel1 kernel(4);
kernel.initFilter(Filter::Triangle);
result = floatImage->downSample(kernel, (FloatImage::WrapMode)inputOptions.wrapMode);
}
else /*if (inputOptions.mipmapFilter == MipmapFilter_Kaiser)*/
{
Kernel1 kernel(10);
kernel.initKaiser(8.0, 0.75f);
result = floatImage->downSample(kernel, (FloatImage::WrapMode)inputOptions.wrapMode);
}
// Normalize mipmap.
if (inputOptions.normalizeMipmaps)
{
normalize(result);
}
return result;
}
// Quantize the input image to the precision of the output format.
static void quantize(Image * img, const InputOptions::Private & inputOptions, Format format)
{
if (inputOptions.enableColorDithering)
{
if (format >= Format_DXT1 && format <= Format_DXT5)
{
Quantize::FloydSteinberg_RGB16(img);
}
}
if (inputOptions.binaryAlpha)
{
if (inputOptions.enableAlphaDithering)
{
Quantize::FloydSteinberg_BinaryAlpha(img, inputOptions.alphaThreshold);
}
else
{
Quantize::BinaryAlpha(img, inputOptions.alphaThreshold);
}
}
else
{
if (inputOptions.enableAlphaDithering)
{
if (format == Format_DXT3)
{
Quantize::Alpha4(img);
}
/*else if (format == Format_DXT1a)
{
Quantize::BinaryAlpha(img, inputOptions.alphaThreshold);
}*/
}
}
}
/// Compress the input texture with the given compression options.
bool nvtt::compress(const InputOptions & inputOptions, const OutputOptions & outputOptions, const CompressionOptions & compressionOptions)
{
// Make sure enums match.
nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp);
nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror);
nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat);
// Output DDS header.
outputHeader(inputOptions.m, outputOptions, compressionOptions.m);
Format format = compressionOptions.m.format;
for (int f = 0; f < inputOptions.m.faceCount; f++)
{
Image * lastImage = NULL;
AutoPtr<FloatImage> floatImage(NULL);
for (int m = 0; m < inputOptions.m.mipmapCount; m++)
{
int idx = f * inputOptions.m.mipmapCount + m;
InputOptions::Private::Image & mipmap = inputOptions.m.images[idx];
if (outputOptions.outputHandler)
{
int size = computeImageSize(mipmap.width, mipmap.height, format);
outputOptions.outputHandler->mipmap(size, mipmap.width, mipmap.height, mipmap.depth, mipmap.face, mipmap.mipLevel);
}
Image * img; // Image to compress.
if (mipmap.data != NULL) // Mipmap provided.
{
// Convert to normal map.
if (inputOptions.m.convertToNormalMap)
{
floatImage = createNormalMap(mipmap.data, (FloatImage::WrapMode)inputOptions.m.wrapMode, inputOptions.m.heightFactors, inputOptions.m.bumpFrequencyScale);
}
else
{
lastImage = img = mipmap.data;
// Delete float image.
floatImage = NULL;
}
}
else // Create mipmap from last.
{
if (m == 0) {
// First mipmap missing.
if (outputOptions.errorHandler != NULL) outputOptions.errorHandler->error(Error_InvalidInput);
return false;
}
if (floatImage == NULL)
{
nvDebugCheck(lastImage != NULL);
floatImage = toFloatImage(lastImage, inputOptions.m);
}
// Create mipmap.
floatImage = createMipmap(floatImage.ptr(), inputOptions.m);
}
if (floatImage != NULL)
{
// Convert to fixed.
img = toFixedImage(floatImage.ptr(), inputOptions.m);
}
quantize(img, inputOptions.m, format);
compressMipmap(img, outputOptions, compressionOptions.m);
if (img != mipmap.data)
{
delete img;
}
if (!inputOptions.m.generateMipmaps || (inputOptions.m.maxLevel >= 0 && m >= inputOptions.m.maxLevel)) {
// continue with next face.
break;
}
}
}
return true;
}
/// Estimate the size of compressing the input with the given options.
int nvtt::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions)
{
Format format = compressionOptions.m.format;
int size = 0;
for (int f = 0; f < inputOptions.m.faceCount; f++)
{
for (int m = 0; m < inputOptions.m.mipmapCount; m++)
{
int idx = f * inputOptions.m.mipmapCount + m;
const InputOptions::Private::Image & img = inputOptions.m.images[idx];
size += computeImageSize(img.width, img.height, format);
if (!inputOptions.m.generateMipmaps || (inputOptions.m.maxLevel >= 0 && m >= inputOptions.m.maxLevel)) {
// continue with next face.
break;
}
}
}
return size;
}
/// Return a string for the given error.
const char * nvtt::errorString(Error e)
{
switch(e)
{
case Error_InvalidInput:
return "Invalid input";
case Error_UserInterruption:
return "User interruption";
case Error_UnsupportedFeature:
return "Unsupported feature";
case Error_CudaError:
return "CUDA error";
case Error_Unknown:
return "Unknown error";
}
return NULL;
}

242
src/nvimage/nvtt/nvtt.h Normal file
View File

@ -0,0 +1,242 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_H
#define NV_TT_H
#include <nvcore/nvcore.h>
// Function linkage
#if NVTT_SHARED
#ifdef NVTT_EXPORTS
#define NVTT_API DLL_EXPORT
#define NVTT_CLASS DLL_EXPORT_CLASS
#else
#define NVTT_API DLL_IMPORT
#define NVTT_CLASS DLL_IMPORT
#endif
#else
#define NVTT_API
#define NVTT_CLASS
#endif
// Public interface.
namespace nvtt
{
/// Supported compression formats.
enum Format
{
// No compression.
Format_RGB,
Format_RGBA = Format_RGB,
// DX9 formats.
Format_DXT1,
// Format_DXT1a, // DXT1 with binary alpha.
Format_DXT3,
Format_DXT5,
Format_DXT5n, // Compressed HILO: R=0, G=x, B=0, A=y
// DX10 formats.
Format_BC1 = Format_DXT1,
Format_BC2 = Format_DXT3,
Format_BC3 = Format_DXT5,
Format_BC3n = Format_DXT5n,
Format_BC4, // ATI1
Format_BC5, // 3DC, ATI2
// OpenGL formats.
Format_LATC = Format_BC5,
};
/// Quality modes.
enum Quality
{
Quality_Fastest,
Quality_Normal,
Quality_Production,
Quality_Highest,
};
/// Compression options. This class describes the desired compression format and other compression settings.
class CompressionOptions
{
public:
NVTT_API CompressionOptions();
NVTT_API ~CompressionOptions();
NVTT_API void reset();
NVTT_API void setFormat(Format format);
NVTT_API void setQuality(Quality quality, float errorThreshold = 0.5f);
NVTT_API void setColorWeights(float red, float green, float blue);
NVTT_API void enableHardwareCompression(bool enable);
NVTT_API void setExternalCompressor(const char * name);
// Set color mask to describe the RGB/RGBA format.
NVTT_API void setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask);
//private:
struct Private;
Private & m;
};
/// Wrap modes. // This matches FloatImage::WrapMode.
enum WrapMode
{
WrapMode_Clamp,
WrapMode_Repeat,
WrapMode_Mirror,
};
/// Texture types.
enum TextureType
{
TextureType_2D,
TextureType_Cube,
// TextureType_3D,
};
/// Input formats.
enum InputFormat
{
InputFormat_BGRA_8UB,
// InputFormat_RGBE_8UB,
// InputFormat_BGRA_32F,
};
/// Mipmap downsampling filters.
enum MipmapFilter
{
MipmapFilter_Box, ///< Box filter is quite good and very fast.
MipmapFilter_Triangle, ///< Triangle filter blurs the results too much, but that might be what you want.
MipmapFilter_Kaiser, ///< Kaiser-windowed Sinc filter is the best downsampling filter.
};
/// Input options. Specify format and layout of the input texture.
struct InputOptions
{
NVTT_API InputOptions();
NVTT_API ~InputOptions();
// Set default options.
NVTT_API void reset();
// Setup input layout.
NVTT_API void setTextureLayout(TextureType type, int w, int h, int d = 1);
NVTT_API void resetTextureLayout();
// Set mipmap data. Copies the data.
NVTT_API bool setMipmapData(const void * data, int w, int h, int d = 1, int face = 0, int mipmap = 0);
// Describe the format of the input.
NVTT_API void setFormat(InputFormat fmt, bool alphaTransparency);
// Set gamma settings.
NVTT_API void setGamma(float inputGamma, float outputGamma);
// Set texture wrappign mode.
NVTT_API void setWrapMode(WrapMode mode);
// Set mipmapping options.
NVTT_API void setMipmapping(bool generateMipmaps, MipmapFilter filter = MipmapFilter_Kaiser, int maxLevel = -1);
// Set quantization options.
NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
// Set normal map options.
NVTT_API void setConvertToNormalMap(bool convert);
NVTT_API void setHeightEvaluation(float redScale, float greenScale, float blueScale, float alphaScale);
NVTT_API void setNormalFilter(float small, float medium, float big, float large);
NVTT_API void setNormalizeMipmaps(bool b);
//private:
struct Private;
Private & m;
};
/// Output handler.
struct OutputHandler
{
virtual ~OutputHandler() {}
/// Indicate the start of a new compressed image that's part of the final texture.
virtual void mipmap(int size, int width, int height, int depth, int face, int miplevel) = 0;
/// Output data. Compressed data is output as soon as it's generated to minimize memory allocations.
virtual void writeData(const void * data, int size) = 0;
};
/// Error codes.
enum Error
{
Error_InvalidInput,
Error_UserInterruption,
Error_UnsupportedFeature,
Error_CudaError,
Error_Unknown,
};
/// Error handler.
struct ErrorHandler
{
virtual ~ErrorHandler() {}
// Signal error.
virtual void error(Error e) = 0;
};
/// Output Options. This class holds pointers to the interfaces that are used to report the output of
/// the compressor to the user.
struct OutputOptions
{
OutputOptions() : outputHandler(NULL), outputHeader(true) { reset(); }
OutputOptions(OutputHandler * oh, ErrorHandler * eh) : outputHandler(oh), errorHandler(eh), outputHeader(true) { reset(); }
// Set default options.
NVTT_API void reset();
OutputHandler * outputHandler;
ErrorHandler * errorHandler;
bool outputHeader;
};
// Main entrypoint of the compression library.
NVTT_API bool compress(const InputOptions & inputOptions, const OutputOptions & outputOptions, const CompressionOptions & compressionOptions);
// Estimate the size of compressing the input with the given options.
NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions);
// Return string for the given error.
NVTT_API const char * errorString(Error e);
} // nvtt namespace
#endif // NV_TT_H

View File

@ -0,0 +1,52 @@
PROJECT(squish)
ENABLE_TESTING()
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SQUISH_SRCS
alpha.cpp
alpha.h
clusterfit.cpp
clusterfit.h
fastclusterfit.cpp
fastclusterfit.h
weightedclusterfit.cpp
weightedclusterfit.h
colourblock.cpp
colourblock.h
colourfit.cpp
colourfit.h
colourset.cpp
colourset.h
config.h
maths.cpp
maths.h
rangefit.cpp
rangefit.h
singlecolourfit.cpp
singlecolourfit.h
singlecolourlookup.inl
squish.cpp
squish.h
simd.h
simd_sse.h
simd_ve.h)
ADD_LIBRARY(squish STATIC ${SQUISH_SRCS})
# libpng
FIND_PACKAGE(PNG)
IF(PNG_FOUND)
INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR})
ADD_EXECUTABLE(squishpng extra/squishpng.cpp)
TARGET_LINK_LIBRARIES(squishpng squish ${PNG_LIBRARY})
ENDIF(PNG_FOUND)
#ADD_EXECUTABLE(squishgen extra/squishgen.cpp)
ADD_EXECUTABLE(squishtest extra/squishtest.cpp)
TARGET_LINK_LIBRARIES(squishtest squish)
ADD_TEST(SQUISHTEST squishtest)

View File

@ -0,0 +1,38 @@
1.7
* Fixed floating-point equality issue in clusterfit sort (x86 affected only)
* Implemented proper SSE(2) floor function for 50% speedup on SSE builds
* The range fit implementation now uses the correct colour metric
1.6
* Fixed bug in CompressImage where masked pixels were not skipped over
* DXT3 and DXT5 alpha compression now properly use the mask to ignore pixels
* Fixed major DXT1 bug that can generate unexpected transparent pixels
1.5
* Added CompressMasked function to handle incomplete DXT blocks more cleanly
* Added kWeightColourByAlpha flag for better quality images when alpha blending
1.4
* Fixed stack overflow in rangefit
1.3
* Worked around SSE floor implementation bug, proper fix needed!
* This release has visual studio and makefile builds that work
1.2
* Added provably optimal single colour compressor
* Added extra/squishgen.cpp that generates single colour lookup tables
1.1
* Fixed a DXT1 colour output bug
* Changed argument order for Decompress function to match Compress
* Added GetStorageRequirements function
* Added CompressImage function
* Added DecompressImage function
* Moved squishtool.cpp to extra/squishpng.cpp
* Added extra/squishtest.cpp
1.0
* Initial release

View File

@ -0,0 +1,223 @@
# Doxyfile 1.4.6
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
PROJECT_NAME = squish
PROJECT_NUMBER = 1.1
OUTPUT_DIRECTORY = docs
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
USE_WINDOWS_ENCODING = NO
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = YES
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
DETAILS_AT_TOP = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 4
ALIASES =
OPTIMIZE_OUTPUT_FOR_C = NO
OPTIMIZE_OUTPUT_JAVA = NO
BUILTIN_STL_SUPPORT = NO
DISTRIBUTE_GROUP_DOC = NO
SUBGROUPING = YES
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = YES
EXTRACT_PRIVATE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
HIDE_FRIEND_COMPOUNDS = NO
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
CASE_SENSE_NAMES = NO
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_BY_SCOPE_NAME = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
SHOW_DIRECTORIES = NO
FILE_VERSION_FILTER =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = YES
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = NO
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = squish.h
FILE_PATTERNS =
RECURSIVE = NO
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = NO
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = NO
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
HTML_ALIGN_MEMBERS = YES
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
BINARY_TOC = NO
TOC_EXPAND = NO
DISABLE_INDEX = NO
ENUM_VALUES_PER_LINE = 4
GENERATE_TREEVIEW = NO
TREEVIEW_WIDTH = 250
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = NO
PAPER_TYPE = a4wide
EXTRA_PACKAGES =
LATEX_HEADER =
PDF_HYPERLINKS = NO
USE_PDFLATEX = NO
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_SCHEMA =
XML_DTD =
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED =
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE =
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = YES
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = YES
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
TEMPLATE_RELATIONS = NO
INCLUDE_GRAPH = YES
INCLUDED_BY_GRAPH = YES
CALL_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
DOT_PATH = /Applications/Graphviz.app/Contents/MacOS
DOTFILE_DIRS =
MAX_DOT_GRAPH_WIDTH = 1024
MAX_DOT_GRAPH_HEIGHT = 1024
MAX_DOT_GRAPH_DEPTH = 0
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES
#---------------------------------------------------------------------------
# Configuration::additions related to the search engine
#---------------------------------------------------------------------------
SEARCHENGINE = NO

View File

@ -0,0 +1,31 @@
include config
SRC = alpha.cpp clusterfit.cpp colourblock.cpp colourfit.cpp colourset.cpp maths.cpp rangefit.cpp singlecolourfit.cpp squish.cpp
OBJ = $(SRC:%.cpp=%.o)
LIB = libsquish.a
all : $(LIB)
install : $(LIB)
install squish.h $(INSTALL_DIR)/include
install libsquish.a $(INSTALL_DIR)/lib
uninstall:
$(RM) $(INSTALL_DIR)/include/squish.h
$(RM) $(INSTALL_DIR)/lib/libsquish.a
$(LIB) : $(OBJ)
$(AR) cr $@ $?
ranlib $@
%.o : %.cpp
$(CXX) $(CPPFLAGS) -I. $(CXXFLAGS) -o$@ -c $<
clean :
$(RM) $(OBJ) $(LIB)

View File

@ -0,0 +1,35 @@
LICENSE
-------
The squish library is distributed under the terms and conditions of the MIT
license. This license is specified at the top of each source file and must be
preserved in its entirety.
BUILDING AND INSTALLING THE LIBRARY
-----------------------------------
If you are using Visual Studio 2003 or above under Windows then load the Visual
Studio 2003 project in the vs7 folder. By default, the library is built using
SSE optimisations. To change this either change or remove the SQUISH_USE_SSE=1
from the preprocessor symbols.
If you are using a Mac then load the Xcode 2.2 project in the distribution. By
default, the library is built using Altivec optimisations. To change this
either change or remove SQUISH_USE_ALTIVEC=1 from the preprocessor symbols. I
guess I'll have to think about changing this for the new Intel Macs that are
rolling out...
If you are using unix then first edit the config file in the base directory of
the distribution, enabling Altivec or SSE with the USE_ALTIVEC or USE_SSE
variables, and editing the optimisation flags passed to the C++ compiler if
necessary. Then make can be used to build the library, and make install (from
the superuser account) can be used to install (into /usr/local by default).
REPORTING BUGS OR FEATURE REQUESTS
----------------------------------
Feedback can be sent to Simon Brown (the developer) at si@sjbrown.co.uk
New releases are announced on the squish library homepage at
http://sjbrown.co.uk/?code=squish

View File

@ -0,0 +1,326 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "alpha.h"
#include <algorithm>
namespace squish {
static int FloatToInt( float a, int limit )
{
// use ANSI round-to-zero behaviour to get round-to-nearest
int i = ( int )( a + 0.5f );
// clamp to the limit
if( i < 0 )
i = 0;
else if( i > limit )
i = limit;
// done
return i;
}
void CompressAlphaDxt3( u8 const* rgba, void* block )
{
u8* bytes = reinterpret_cast< u8* >( block );
// quantise and pack the alpha values pairwise
for( int i = 0; i < 8; ++i )
{
// quantise down to 4 bits
float alpha1 = ( float )rgba[8*i + 3] * ( 15.0f/255.0f );
float alpha2 = ( float )rgba[8*i + 7] * ( 15.0f/255.0f );
int quant1 = FloatToInt( alpha1, 15 );
int quant2 = FloatToInt( alpha2, 15 );
// pack into the byte
bytes[i] = ( u8 )( quant1 | ( quant2 << 4 ) );
}
}
void DecompressAlphaDxt3( u8* rgba, void const* block )
{
u8 const* bytes = reinterpret_cast< u8 const* >( block );
// unpack the alpha values pairwise
for( int i = 0; i < 8; ++i )
{
// quantise down to 4 bits
u8 quant = bytes[i];
// unpack the values
u8 lo = quant & 0x0f;
u8 hi = quant & 0xf0;
// convert back up to bytes
rgba[8*i + 3] = lo | ( lo << 4 );
rgba[8*i + 7] = hi | ( hi >> 4 );
}
}
static void FixRange( int& min, int& max, int steps )
{
if( max - min < steps )
max = std::min( min + steps, 255 );
if( max - min < steps )
min = std::max( 0, max - steps );
}
static int FitCodes( u8 const* rgba, u8 const* codes, u8* indices )
{
// fit each alpha value to the codebook
int err = 0;
for( int i = 0; i < 16; ++i )
{
// find the least error and corresponding index
int value = rgba[4*i + 3];
int least = INT_MAX;
int index = 0;
for( int j = 0; j < 8; ++j )
{
// get the squared error from this code
int dist = ( int )value - ( int )codes[j];
dist *= dist;
// compare with the best so far
if( dist < least )
{
least = dist;
index = j;
}
}
// save this index and accumulate the error
indices[i] = ( u8 )index;
err += least;
}
// return the total error
return err;
}
static void WriteAlphaBlock( int alpha0, int alpha1, u8 const* indices, void* block )
{
u8* bytes = reinterpret_cast< u8* >( block );
// write the first two bytes
bytes[0] = ( u8 )alpha0;
bytes[1] = ( u8 )alpha1;
// pack the indices with 3 bits each
u8* dest = bytes + 2;
u8 const* src = indices;
for( int i = 0; i < 2; ++i )
{
// pack 8 3-bit values
int value = 0;
for( int j = 0; j < 8; ++j )
{
int index = *src++;
value |= ( index << 3*j );
}
// store in 3 bytes
for( int j = 0; j < 3; ++j )
{
int byte = ( value >> 8*j ) & 0xff;
*dest++ = ( u8 )byte;
}
}
}
static void WriteAlphaBlock5( int alpha0, int alpha1, u8 const* indices, void* block )
{
// check the relative values of the endpoints
if( alpha0 > alpha1 )
{
// swap the indices
u8 swapped[16];
for( int i = 0; i < 16; ++i )
{
u8 index = indices[i];
if( index == 0 )
swapped[i] = 1;
else if( index == 1 )
swapped[i] = 0;
else if( index <= 5 )
swapped[i] = 7 - index;
else
swapped[i] = index;
}
// write the block
WriteAlphaBlock( alpha1, alpha0, swapped, block );
}
else
{
// write the block
WriteAlphaBlock( alpha0, alpha1, indices, block );
}
}
static void WriteAlphaBlock7( int alpha0, int alpha1, u8 const* indices, void* block )
{
// check the relative values of the endpoints
if( alpha0 < alpha1 )
{
// swap the indices
u8 swapped[16];
for( int i = 0; i < 16; ++i )
{
u8 index = indices[i];
if( index == 0 )
swapped[i] = 1;
else if( index == 1 )
swapped[i] = 0;
else
swapped[i] = 9 - index;
}
// write the block
WriteAlphaBlock( alpha1, alpha0, swapped, block );
}
else
{
// write the block
WriteAlphaBlock( alpha0, alpha1, indices, block );
}
}
void CompressAlphaDxt5( u8 const* rgba, void* block )
{
// get the range for 5-alpha and 7-alpha interpolation
int min5 = 255;
int max5 = 0;
int min7 = 255;
int max7 = 0;
for( int i = 0; i < 16; ++i )
{
// incorporate into the min/max
int value = rgba[4*i + 3];
if( value < min7 )
min7 = value;
if( value > max7 )
max7 = value;
if( value != 0 && value < min5 )
min5 = value;
if( value != 255 && value > max5 )
max5 = value;
}
// handle the case that no valid range was found
if( min5 > max5 )
min5 = max5;
if( min7 > max7 )
min7 = max7;
// fix the range to be the minimum in each case
FixRange( min5, max5, 5 );
FixRange( min7, max7, 7 );
// set up the 5-alpha code book
u8 codes5[8];
codes5[0] = ( u8 )min5;
codes5[1] = ( u8 )max5;
for( int i = 1; i < 5; ++i )
codes5[1 + i] = ( u8 )( ( ( 5 - i )*min5 + i*max5 )/5 );
codes5[6] = 0;
codes5[7] = 255;
// set up the 7-alpha code book
u8 codes7[8];
codes7[0] = ( u8 )min7;
codes7[1] = ( u8 )max7;
for( int i = 1; i < 7; ++i )
codes7[1 + i] = ( u8 )( ( ( 7 - i )*min7 + i*max7 )/7 );
// fit the data to both code books
u8 indices5[16];
u8 indices7[16];
int err5 = FitCodes( rgba, codes5, indices5 );
int err7 = FitCodes( rgba, codes7, indices7 );
// save the block with least error
if( err5 <= err7 )
WriteAlphaBlock5( min5, max5, indices5, block );
else
WriteAlphaBlock7( min7, max7, indices7, block );
}
void DecompressAlphaDxt5( u8* rgba, void const* block )
{
// get the two alpha values
u8 const* bytes = reinterpret_cast< u8 const* >( block );
int alpha0 = bytes[0];
int alpha1 = bytes[1];
// compare the values to build the codebook
u8 codes[8];
codes[0] = ( u8 )alpha0;
codes[1] = ( u8 )alpha1;
if( alpha0 <= alpha1 )
{
// use 5-alpha codebook
for( int i = 1; i < 5; ++i )
codes[1 + i] = ( u8 )( ( ( 5 - i )*alpha0 + i*alpha1 )/5 );
codes[6] = 0;
codes[7] = 255;
}
else
{
// use 7-alpha codebook
for( int i = 1; i < 7; ++i )
codes[1 + i] = ( u8 )( ( ( 7 - i )*alpha0 + i*alpha1 )/7 );
}
// decode the indices
u8 indices[16];
u8 const* src = bytes + 2;
u8* dest = indices;
for( int i = 0; i < 2; ++i )
{
// grab 3 bytes
int value = 0;
for( int j = 0; j < 3; ++j )
{
int byte = *src++;
value |= ( byte << 8*j );
}
// unpack 8 3-bit values from it
for( int j = 0; j < 8; ++j )
{
int index = ( value >> 3*j ) & 0x7;
*dest++ = ( u8 )index;
}
}
// write out the indexed codebook values
for( int i = 0; i < 16; ++i )
rgba[4*i + 3] = codes[indices[i]];
}
} // namespace squish

View File

@ -0,0 +1,41 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_ALPHA_H
#define SQUISH_ALPHA_H
#include <squish.h>
namespace squish {
void CompressAlphaDxt3( u8 const* rgba, void* block );
void CompressAlphaDxt5( u8 const* rgba, void* block );
void DecompressAlphaDxt3( u8* rgba, void const* block );
void DecompressAlphaDxt5( u8* rgba, void const* block );
} // namespace squish
#endif // ndef SQUISH_ALPHA_H

View File

@ -0,0 +1,499 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "clusterfit.h"
#include "colourset.h"
#include "colourblock.h"
#include <cfloat>
namespace squish {
ClusterFit::ClusterFit( ColourSet const* colours, int flags )
: ColourFit( colours, flags )
{
// initialise the best error
#if SQUISH_USE_SIMD
m_besterror = VEC4_CONST( FLT_MAX );
#else
m_besterror = FLT_MAX;
#endif
/* // initialise the metric
bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
#if SQUISH_USE_SIMD
if( perceptual )
m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f );
else
m_metric = VEC4_CONST( 1.0f );
#else
if( perceptual )
m_metric = Vec3( 0.2126f, 0.7152f, 0.0722f );
else
m_metric = Vec3( 1.0f );
#endif
*/
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// get the covariance matrix
Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
// compute the principle component
Vec3 principle = ComputePrincipleComponent( covariance );
// build the list of values
float dps[16];
for( int i = 0; i < count; ++i )
{
dps[i] = Dot( values[i], principle );
m_order[i] = i;
}
// stable sort
for( int i = 0; i < count; ++i )
{
for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
{
std::swap( dps[j], dps[j - 1] );
std::swap( m_order[j], m_order[j - 1] );
}
}
// weight all the points
#if SQUISH_USE_SIMD
Vec4 const* unweighted = m_colours->GetPointsSimd();
Vec4 const* weights = m_colours->GetWeightsSimd();
m_xxsum = VEC4_CONST( 0.0f );
#else
Vec3 const* unweighted = m_colours->GetPoints();
float const* weights = m_colours->GetWeights();
m_xxsum = Vec3( 0.0f );
#endif
for( int i = 0; i < count; ++i )
{
int p = m_order[i];
m_unweighted[i] = unweighted[p];
m_weights[i] = weights[p];
m_weighted[i] = weights[p]*unweighted[p];
m_xxsum += m_weighted[i]*m_weighted[i];
}
}
void ClusterFit::setMetric(float r, float g, float b)
{
#if SQUISH_USE_SIMD
m_metric = Vec4(r, g, b, 0);
#else
m_metric = Vec3(r, g, b);
#endif
}
float ClusterFit::bestError() const
{
#if SQUISH_USE_SIMD
return m_besterror.GetVec3().X();
#else
return m_besterror;
#endif
}
void ClusterFit::Compress3( void* block )
{
// declare variables
int const count = m_colours->GetCount();
#if SQUISH_USE_SIMD
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = VEC4_CONST( FLT_MAX );
Vec4 const half = VEC4_CONST( 0.5f );
Vec4 const zero = VEC4_CONST( 0.0f );
#else
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
float besterror = FLT_MAX;
float const half = 0.5f;
float const zero = 0.0f;
#endif
// check all possible clusters for this total order
u8 indices[16];
u8 bestindices[16];
// first cluster [0,i) is at the start
for( int m = 0; m < count; ++m )
{
indices[m] = 0;
m_alpha[m] = m_weights[m];
m_beta[m] = zero;
}
for( int i = count; i >= 0; --i )
{
// second cluster [i,j) is half along
for( int m = i; m < count; ++m )
{
indices[m] = 2;
m_alpha[m] = m_beta[m] = half*m_weights[m];
}
for( int j = count; j > i; --j )
{
// last cluster [j,k) is at the end
if( j < count )
{
indices[j] = 1;
m_alpha[j] = zero;
m_beta[j] = m_weights[j];
}
// solve a least squares problem to place the endpoints
#if SQUISH_USE_SIMD
Vec4 start, end;
Vec4 error = SolveLeastSquares( start, end );
#else
Vec3 start, end;
float error = SolveLeastSquares( start, end );
#endif
// keep the solution if it wins
#if SQUISH_USE_SIMD
if( CompareAnyLessThan( error, besterror ) )
#else
if( error < besterror )
#endif
{
beststart = start;
bestend = end;
for( int m = 0; m < 16; ++m ) // TODO: make this faster?
bestindices[m] = indices[m];
besterror = error;
}
}
}
// save the block if necessary
#if SQUISH_USE_SIMD
if( CompareAnyLessThan( besterror, m_besterror ) )
#else
if( besterror < m_besterror )
#endif
{
// remap the indices
u8 unordered[16];
for( int i = 0; i < count; ++i )
unordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( unordered, bestindices );
// save the block
#if SQUISH_USE_SIMD
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
#else
WriteColourBlock3( beststart, bestend, bestindices, block );
#endif
// save the error
m_besterror = besterror;
}
}
//static int run = 0;
//static bool debug = false;
void ClusterFit::Compress4( void* block )
{
//debug = (run == 1);
//run++;
// declare variables
int const count = m_colours->GetCount();
#if SQUISH_USE_SIMD
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = m_besterror;
Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f );
Vec4 const onethird = VEC4_CONST( 1.0f/3.0f );
Vec4 const zero = VEC4_CONST( 0.0f );
#else
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
float besterror = m_besterror;
float const twothirds = 2.0f/3.0f;
float const onethird = 1.0f/3.0f;
float const zero = 0.0f;
#endif
// check all possible clusters for this total order
u8 indices[16];
u8 bestindices[16];
// first cluster [0,i) is at the start
for( int m = 0; m < count; ++m )
{
indices[m] = 0;
m_alpha[m] = m_weights[m];
m_beta[m] = zero;
}
for( int i = count; i >= 0; --i )
{
// second cluster [i,j) is one third along
for( int m = i; m < count; ++m )
{
indices[m] = 2;
m_alpha[m] = twothirds*m_weights[m];
m_beta[m] = onethird*m_weights[m];
}
for( int j = count; j >= i; --j )
{
// third cluster [j,k) is two thirds along
for( int m = j; m < count; ++m )
{
indices[m] = 3;
m_alpha[m] = onethird*m_weights[m];
m_beta[m] = twothirds*m_weights[m];
}
for( int k = count; k >= j; --k )
{
if (j + k == 0) continue;
// last cluster [k,n) is at the end
if( k < count )
{
indices[k] = 1;
m_alpha[k] = zero;
m_beta[k] = m_weights[k];
}
/*unsigned int permutation = 0;
for(int p = 0; p < 16; p++) {
permutation |= indices[p] << (p * 2);
}
if (debug) printf("%X:\t", permutation);
if (debug && permutation == 0x55FFFFAA) __debugbreak();
*/
// solve a least squares problem to place the endpoints
#if SQUISH_USE_SIMD
Vec4 start, end;
Vec4 error = SolveLeastSquares( start, end );
#else
Vec3 start, end;
float error = SolveLeastSquares( start, end );
#endif
// keep the solution if it wins
#if SQUISH_USE_SIMD
if( CompareAnyLessThan( error, besterror ) )
#else
if( error < besterror )
#endif
{
beststart = start;
bestend = end;
for( int m = 0; m < 16; ++m ) // TODO: make this faster?
bestindices[m] = indices[m];
besterror = error;
}
}
}
}
// save the block if necessary
#if SQUISH_USE_SIMD
if( CompareAnyLessThan( besterror, m_besterror ) )
#else
if( besterror < m_besterror )
#endif
{
// remap the indices
u8 unordered[16];
for( int i = 0; i < count; ++i )
unordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( unordered, bestindices );
// save the block
#if SQUISH_USE_SIMD
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
#else
WriteColourBlock4( beststart, bestend, bestindices, block );
#endif
// save the error
m_besterror = besterror;
}
}
#if SQUISH_USE_SIMD
Vec4 ClusterFit::SolveLeastSquares( Vec4& start, Vec4& end ) const
{
// accumulate all the quantities we need
int const count = m_colours->GetCount();
Vec4 alpha2_sum = VEC4_CONST( 0.0f );
Vec4 beta2_sum = VEC4_CONST( 0.0f );
Vec4 alphabeta_sum = VEC4_CONST( 0.0f );
Vec4 alphax_sum = VEC4_CONST( 0.0f );
Vec4 betax_sum = VEC4_CONST( 0.0f );
for( int i = 0; i < count; ++i )
{
Vec4 alpha = m_alpha[i];
Vec4 beta = m_beta[i];
Vec4 x = m_weighted[i];
alpha2_sum = MultiplyAdd( alpha, alpha, alpha2_sum );
beta2_sum = MultiplyAdd( beta, beta, beta2_sum );
alphabeta_sum = MultiplyAdd( alpha, beta, alphabeta_sum );
alphax_sum = MultiplyAdd( alpha, x, alphax_sum );
betax_sum = MultiplyAdd( beta, x, betax_sum );
}
// select the results
Vec4 const zero = VEC4_CONST( 0.0f );
Vec4 beta2_sum_zero = CompareEqual( beta2_sum, zero );
Vec4 alpha2_sum_zero = CompareEqual( alpha2_sum, zero );
Vec4 a1 = alphax_sum*Reciprocal( alpha2_sum );
Vec4 b1 = betax_sum*Reciprocal( beta2_sum );
Vec4 factor = Reciprocal( NegativeMultiplySubtract(
alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum
) );
Vec4 a2 = NegativeMultiplySubtract(
betax_sum, alphabeta_sum, alphax_sum*beta2_sum
)*factor;
Vec4 b2 = NegativeMultiplySubtract(
alphax_sum, alphabeta_sum, betax_sum*alpha2_sum
)*factor;
Vec4 a = Select( Select( a2, a1, beta2_sum_zero ), zero, alpha2_sum_zero );
Vec4 b = Select( Select( b2, b1, alpha2_sum_zero ), zero, beta2_sum_zero );
// clamp the output to [0, 1]
Vec4 const one = VEC4_CONST( 1.0f );
Vec4 const half = VEC4_CONST( 0.5f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
// Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); // IC: use approximate grid fitting.
Vec4 const onethird = VEC4_CONST( 1.0f/3.0f );
Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f );
a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
// compute the error
Vec4 const two = VEC4_CONST( 2.0 );
Vec4 e1 = MultiplyAdd( b*b, beta2_sum, m_xxsum );
Vec4 e2 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e3 = MultiplyAdd( a*a, alpha2_sum, e1 );
Vec4 e4 = MultiplyAdd( a*b*alphabeta_sum - e2, two, e3 );
// apply the metric to the error term
Vec4 e5 = e4*m_metric;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
// save the start and end
start = a;
end = b;
return error;
}
#else
float ClusterFit::SolveLeastSquares( Vec3& start, Vec3& end ) const
{
// accumulate all the quantities we need
int const count = m_colours->GetCount();
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
Vec3 alphax_sum( 0.0f );
Vec3 betax_sum( 0.0f );
for( int i = 0; i < count; ++i )
{
float alpha = m_alpha[i];
float beta = m_beta[i];
Vec3 const& x = m_weighted[i];
alpha2_sum += alpha*alpha;
beta2_sum += beta*beta;
alphabeta_sum += alpha*beta;
alphax_sum += alpha*x;
betax_sum += beta*x;
}
//if (debug) printf("%f %f %f", alpha2_sum, beta2_sum, alphabeta_sum);
// zero where non-determinate
Vec3 a, b;
if( beta2_sum == 0.0f )
{
a = alphax_sum/alpha2_sum;
b = Vec3( 0.0f );
}
else if( alpha2_sum == 0.0f )
{
a = Vec3( 0.0f );
b = betax_sum/beta2_sum;
}
else
{
float factor = 1.0f/( alpha2_sum*beta2_sum - alphabeta_sum*alphabeta_sum );
a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
}
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
//Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
Vec3 const gridrcp(0.03227752766457f, 0.01583151765563f, 0.03227752766457f); // IC: use approximate grid fitting.
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
// compute the error
Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum /*+ m_xxsum*/
+ 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
// apply the metric to the error term
float error = Dot( e1, m_metric );
//if (debug) printf(" - %f\n", error);
// save the start and end
start = a;
end = b;
return error;
}
#endif
} // namespace squish

View File

@ -0,0 +1,79 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_CLUSTERFIT_H
#define SQUISH_CLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace squish {
class ClusterFit : public ColourFit
{
public:
ClusterFit( ColourSet const* colours, int flags );
void setMetric(float r, float g, float b);
float bestError() const;
private:
virtual void Compress3( void* block );
virtual void Compress4( void* block );
void Reorder( Vec3::Arg principle );
Vec3 m_principle;
#if SQUISH_USE_SIMD
Vec4 SolveLeastSquares( Vec4& start, Vec4& end ) const;
Vec4 m_weighted[16];
Vec4 m_unweighted[16];
Vec4 m_weights[16];
Vec4 m_metric;
Vec4 m_alpha[16];
Vec4 m_beta[16];
Vec4 m_xxsum;
Vec4 m_besterror;
#else
float SolveLeastSquares( Vec3& start, Vec3& end ) const;
Vec3 m_weighted[16];
Vec3 m_unweighted[16];
float m_weights[16];
Vec3 m_metric;
float m_alpha[16];
float m_beta[16];
Vec3 m_xxsum;
float m_besterror;
#endif
int m_order[16];
};
} // namespace squish
#endif // ndef SQUISH_CLUSTERFIT_H

View File

@ -0,0 +1,278 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourblock.h"
namespace squish {
static int FloatToInt( float a, int limit )
{
// use ANSI round-to-zero behaviour to get round-to-nearest
int i = ( int )( a + 0.5f );
// clamp to the limit
if( i < 0 )
i = 0;
else if( i > limit )
i = limit;
// done
return i;
}
static int FloatTo565( Vec3::Arg colour )
{
// get the components in the correct range
int r = FloatToInt( 31.0f*colour.X(), 31 );
int g = FloatToInt( 63.0f*colour.Y(), 63 );
int b = FloatToInt( 31.0f*colour.Z(), 31 );
// pack into a single value
return ( r << 11 ) | ( g << 5 ) | b;
}
static void WriteColourBlock( int a, int b, u8* indices, void* block )
{
// get the block as bytes
u8* bytes = ( u8* )block;
// write the endpoints
bytes[0] = ( u8 )( a & 0xff );
bytes[1] = ( u8 )( a >> 8 );
bytes[2] = ( u8 )( b & 0xff );
bytes[3] = ( u8 )( b >> 8 );
// write the indices
for( int i = 0; i < 4; ++i )
{
u8 const* ind = indices + 4*i;
bytes[4 + i] = ind[0] | ( ind[1] << 2 ) | ( ind[2] << 4 ) | ( ind[3] << 6 );
}
}
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
u8 remapped[16];
if( a <= b )
{
// use the indices directly
for( int i = 0; i < 16; ++i )
remapped[i] = indices[i];
}
else
{
// swap a and b
std::swap( a, b );
for( int i = 0; i < 16; ++i )
{
if( indices[i] == 0 )
remapped[i] = 1;
else if( indices[i] == 1 )
remapped[i] = 0;
else
remapped[i] = indices[i];
}
}
// write the block
WriteColourBlock( a, b, remapped, block );
}
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
u8 remapped[16];
if( a < b )
{
// swap a and b
std::swap( a, b );
for( int i = 0; i < 16; ++i )
remapped[i] = ( indices[i] ^ 0x1 ) & 0x3;
}
else if( a == b )
{
// use index 0
for( int i = 0; i < 16; ++i )
remapped[i] = 0;
}
else
{
// use the indices directly
for( int i = 0; i < 16; ++i )
remapped[i] = indices[i];
}
// write the block
WriteColourBlock( a, b, remapped, block );
}
/*
static void WriteColourBlock( int a, int b, uint indices, void* block )
{
// get the block as bytes
u8* bytes = ( u8* )block;
// write the endpoints
bytes[0] = ( u8 )( a & 0xff );
bytes[1] = ( u8 )( a >> 8 );
bytes[2] = ( u8 )( b & 0xff );
bytes[3] = ( u8 )( b >> 8 );
// write the indices @@ Not sure that's correct...
bytes[4] = ( u8 )((indices >> 24) & 0xff);
bytes[5] = ( u8 )((indices >> 16) & 0xff);
bytes[6] = ( u8 )((indices >> 8) & 0xff);
bytes[7] = ( u8 )((indices >> 0) & 0xff);
}
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, uint indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
if( a > b )
{
// swap a and b
std::swap( a, b );
indices ^= (~indices >> 1) & 0x55555555;
}
else if ( a == b )
{
indices = 0;
}
// write the block
WriteColourBlock( a, b, indices, block );
}
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, uint indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
if( a < b )
{
// swap a and b
std::swap( a, b );
indices ^= 0x55555555;
}
else if( a == b )
{
indices = 0;
}
// write the block
WriteColourBlock( a, b, indices, block );
}
*/
static int Unpack565( u8 const* packed, u8* colour )
{
// build the packed value
int value = ( int )packed[0] | ( ( int )packed[1] << 8 );
// get the components in the stored range
u8 red = ( u8 )( ( value >> 11 ) & 0x1f );
u8 green = ( u8 )( ( value >> 5 ) & 0x3f );
u8 blue = ( u8 )( value & 0x1f );
// scale up to 8 bits
colour[0] = ( red << 3 ) | ( red >> 2 );
colour[1] = ( green << 2 ) | ( green >> 4 );
colour[2] = ( blue << 3 ) | ( blue >> 2 );
colour[3] = 255;
// return the value
return value;
}
void DecompressColour( u8* rgba, void const* block, bool isDxt1 )
{
// get the block bytes
u8 const* bytes = reinterpret_cast< u8 const* >( block );
// unpack the endpoints
u8 codes[16];
int a = Unpack565( bytes, codes );
int b = Unpack565( bytes + 2, codes + 4 );
// generate the midpoints
for( int i = 0; i < 3; ++i )
{
int c = codes[i];
int d = codes[4 + i];
if( isDxt1 && a <= b )
{
codes[8 + i] = ( u8 )( ( c + d )/2 );
codes[12 + i] = 0;
}
else
{
codes[8 + i] = ( u8 )( ( 2*c + d )/3 );
codes[12 + i] = ( u8 )( ( c + 2*d )/3 );
}
}
// fill in alpha for the intermediate values
codes[8 + 3] = 255;
codes[12 + 3] = ( isDxt1 && a <= b ) ? 0 : 255;
// unpack the indices
u8 indices[16];
for( int i = 0; i < 4; ++i )
{
u8* ind = indices + 4*i;
u8 packed = bytes[4 + i];
ind[0] = packed & 0x3;
ind[1] = ( packed >> 2 ) & 0x3;
ind[2] = ( packed >> 4 ) & 0x3;
ind[3] = ( packed >> 6 ) & 0x3;
}
// store out the colours
for( int i = 0; i < 16; ++i )
{
u8 offset = 4*indices[i];
for( int j = 0; j < 4; ++j )
rgba[4*i + j] = codes[offset + j];
}
}
} // namespace squish

View File

@ -0,0 +1,43 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURBLOCK_H
#define SQUISH_COLOURBLOCK_H
#include "squish.h"
#include "maths.h"
namespace squish {
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
//void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, uint indices, void* block );
//void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, uint indices, void* block );
void DecompressColour( u8* rgba, void const* block, bool isDxt1 );
} // namespace squish
#endif // ndef SQUISH_COLOURBLOCK_H

View File

@ -0,0 +1,54 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourfit.h"
#include "colourset.h"
namespace squish {
ColourFit::ColourFit( ColourSet const* colours, int flags )
: m_colours( colours ),
m_flags( flags )
{
}
void ColourFit::Compress( void* block )
{
bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
if( isDxt1 )
{
Compress4( block );
if( !m_colours->IsTransparent() )
{
Compress3( block );
}
}
else
{
Compress4( block );
}
}
} // namespace squish

View File

@ -0,0 +1,53 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURFIT_H
#define SQUISH_COLOURFIT_H
#include "squish.h"
#include "maths.h"
namespace squish {
class ColourSet;
class ColourFit
{
public:
ColourFit( ColourSet const* colours, int flags );
void Compress( void* block );
protected:
virtual void Compress3( void* block ) = 0;
virtual void Compress4( void* block ) = 0;
ColourSet const* m_colours;
int m_flags;
};
} // namespace squish
#endif // ndef SQUISH_COLOURFIT_H

View File

@ -0,0 +1,134 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourset.h"
namespace squish {
ColourSet::ColourSet( u8 const* rgba, int flags )
: m_count( 0 ),
m_transparent( false )
{
// check the compression mode for dxt1
bool isDxt1 = ( ( flags & kDxt1 ) != 0 );
bool weightByAlpha = ( ( flags & kWeightColourByAlpha ) != 0 );
// create the minimal set
for( int i = 0; i < 16; ++i )
{
// check for transparent pixels when using dxt1
if( isDxt1 && rgba[4*i + 3] == 0 )
{
m_remap[i] = -1;
m_transparent = true;
continue;
}
#if 1
// normalise coordinates to [0,1]
float x = ( float )rgba[4*i + 2] / 255.0f;
float y = ( float )rgba[4*i + 1] / 255.0f;
float z = ( float )rgba[4*i + 0] / 255.0f;
// ensure there is always non-zero weight even for zero alpha
float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
// add the point
m_points[m_count] = Vec3( x, y, z );
m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
m_remap[i] = m_count;
// advance
++m_count;
#else
// loop over previous points for a match
for( int j = 0;; ++j )
{
// allocate a new point
if( j == i )
{
// normalise coordinates to [0,1]
float x = ( float )rgba[4*i + 2] / 255.0f;
float y = ( float )rgba[4*i + 1] / 255.0f;
float z = ( float )rgba[4*i + 0] / 255.0f;
// ensure there is always non-zero weight even for zero alpha
float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
// add the point
m_points[m_count] = Vec3( x, y, z );
m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
m_remap[i] = m_count;
// advance
++m_count;
break;
}
// check for a match
bool match = ( rgba[4*i] == rgba[4*j] )
&& ( rgba[4*i + 1] == rgba[4*j + 1] )
&& ( rgba[4*i + 2] == rgba[4*j + 2] )
&& ( rgba[4*j + 3] != 0 || !isDxt1 );
if( match )
{
// get the index of the match
int index = m_remap[j];
// ensure there is always non-zero weight even for zero alpha
float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
// map to this point and increase the weight
m_weights[index] += ( weightByAlpha ? w : 1.0f );
m_remap[i] = index;
break;
}
}
#endif
}
#if SQUISH_USE_SIMD
// generate vector values
for( int i = 0; i < m_count; ++i )
{
m_points_simd[i] = Vec4(m_points[i].X(), m_points[i].Y(), m_points[i].Z(), 1);
m_weights_simd[i] = VEC4_CONST(m_weights[i]);
}
#endif
}
void ColourSet::RemapIndices( u8 const* source, u8* target ) const
{
for( int i = 0; i < 16; ++i )
{
int j = m_remap[i];
if( j == -1 )
target[i] = 3;
else
target[i] = source[j];
}
}
} // namespace squish

View File

@ -0,0 +1,69 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURSET_H
#define SQUISH_COLOURSET_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
namespace squish {
/*! @brief Represents a set of block colours
*/
class ColourSet
{
public:
ColourSet( u8 const* rgba, int flags );
int GetCount() const { return m_count; }
Vec3 const* GetPoints() const { return m_points; }
float const* GetWeights() const { return m_weights; }
bool IsTransparent() const { return m_transparent; }
void RemapIndices( u8 const* source, u8* target ) const;
private:
int m_count;
Vec3 m_points[16];
float m_weights[16];
int m_remap[16];
bool m_transparent;
#if SQUISH_USE_SIMD
public:
Vec4 const* GetPointsSimd() const { return m_points_simd; }
Vec4 const* GetWeightsSimd() const { return m_weights_simd; }
private:
Vec4 m_points_simd[16];
Vec4 m_weights_simd[16];
#endif
};
} // namespace sqish
#endif // ndef SQUISH_COLOURSET_H

View File

@ -0,0 +1,22 @@
# config file used for the Makefile only
# define to 1 to use altivec instructions
USE_ALTIVEC ?= 0
# define to 1 to use sse instructions
USE_SSE ?= 0
# default flags
CXXFLAGS ?= -O2
ifeq ($(USE_ALTIVEC),1)
CPPFLAGS += -DSQUISH_USE_ALTIVEC=1
CXXFLAGS += -maltivec
endif
ifeq ($(USE_SSE),1)
CPPFLAGS += -DSQUISH_USE_SSE=1
CXXFLAGS += -msse
endif
# where should we install to
INSTALL_DIR ?= /usr/local

View File

@ -0,0 +1,55 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_CONFIG_H
#define SQUISH_CONFIG_H
// Set to 1 when building squish to use altivec instructions.
#ifndef SQUISH_USE_ALTIVEC
# define SQUISH_USE_ALTIVEC defined(__VEC__)
#endif
// Set to 1 when building squish to use sse instructions.
#ifndef SQUISH_USE_SSE
# if defined(__SSE2__)
# define SQUISH_USE_SSE 2
# elif defined(__SSE__)
# define SQUISH_USE_SSE 1
# else
# define SQUISH_USE_SSE 0
# endif
#endif
// Internally et SQUISH_USE_SIMD when either altivec or sse is available.
#if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
# error "Cannot enable both altivec and sse!"
#endif
#if SQUISH_USE_ALTIVEC || SQUISH_USE_SSE
# define SQUISH_USE_SIMD 1
#else
# define SQUISH_USE_SIMD 0
#endif
#endif // ndef SQUISH_CONFIG_H

View File

@ -0,0 +1,158 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include <iostream>
struct SourceBlock
{
int start;
int end;
int error;
};
struct TargetValue
{
SourceBlock sources[4];
};
static void GenerateData( std::string const& name, int bits, int colours )
{
TargetValue values[256];
// initialise the data
for( int target = 0; target < 256; ++target )
for( int index = 0; index < colours; ++index )
values[target].sources[index].error = 255;
// loop over all possible source points
int count = ( 1 << bits );
for( int value1 = 0; value1 < count; ++value1 )
{
for( int value2 = 0; value2 < count; ++value2 )
{
// compute the 8-bit endpoints
int a = ( value1 << ( 8 - bits ) ) | ( value1 >> ( 2*bits - 8 ) );
int b = ( value2 << ( 8 - bits ) ) | ( value2 >> ( 2*bits - 8 ) );
// fill in the codebook with the these and intermediates
int codes[4];
codes[0] = a;
codes[1] = b;
if( colours == 3 )
{
codes[2] = ( a + b )/2;
codes[3] = 0;
}
else
{
codes[2] = ( 2*a + b )/3;
codes[3] = ( a + 2*b )/3;
}
// mark each target point with the endpoints and index needed for it
for( int index = 0; index < colours; ++index )
{
int target = codes[index];
SourceBlock& block = values[target].sources[index];
if( block.error != 0 )
{
block.start = value1;
block.end = value2;
block.error = 0;
}
}
}
}
// iteratively fill in the missing values
for( ;; )
{
bool stable = true;
for( int index = 0; index < colours; ++index )
{
for( int target = 0; target < 256; ++target )
{
if( target != 255 )
{
SourceBlock& current = values[target].sources[index];
SourceBlock& next = values[target + 1].sources[index];
if( current.error > next.error + 1 )
{
current.start = next.start;
current.end = next.end;
current.error = next.error + 1;
stable = false;
}
}
if( target != 0 )
{
SourceBlock& current = values[target].sources[index];
SourceBlock& previous = values[target - 1].sources[index];
if( current.error > previous.error + 1 )
{
current.start = previous.start;
current.end = previous.end;
current.error = previous.error + 1;
stable = false;
}
}
}
}
if( stable )
break;
}
// debug
std::cout << "\nstatic SingleColourLookup const " << name << "[] = \n{\n";
for( int i = 0;; )
{
std::cout << "\t{ { ";
for( int j = 0;; )
{
SourceBlock const& block = values[i].sources[j];
if( j < colours )
std::cout << "{ " << block.start << ", " << block.end << ", " << block.error << " }";
else
std::cout << "{ 0, 0, 0 }";
if( ++j == 4 )
break;
std::cout << ", ";
}
std::cout << " } }";
if( ++i == 256 )
break;
std::cout << ",\n";
}
std::cout << "\n};\n";
}
int main()
{
GenerateData( "lookup_5_3", 5, 3 );
GenerateData( "lookup_6_3", 6, 3 );
GenerateData( "lookup_5_4", 5, 4 );
GenerateData( "lookup_6_4", 6, 4 );
}

View File

@ -0,0 +1,603 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
/*! @file
@brief Example program that converts between the PNG and DXT formats.
This program requires libpng for PNG input and output, and is designed
to show how to prepare data for the squish library when it is not simply
a contiguous block of memory.
*/
#include <iostream>
#include <string>
#include <sstream>
#include <ctime>
#include <cmath>
#include <squish.h>
#include <png.h>
#ifdef _MSC_VER
#pragma warning( disable: 4511 4512 )
#endif // def _MSC_VER
using namespace squish;
//! Simple exception class.
class Error : public std::exception
{
public:
Error( std::string const& excuse ) : m_excuse( excuse ) {}
~Error() throw() {}
virtual char const* what() const throw() { return m_excuse.c_str(); }
private:
std::string m_excuse;
};
//! Base class to make derived classes non-copyable
class NonCopyable
{
public:
NonCopyable() {}
private:
NonCopyable( NonCopyable const& );
NonCopyable& operator=( NonCopyable const& );
};
//! Memory object.
class Mem : NonCopyable
{
public:
explicit Mem( int size ) : m_p( new u8[size] ) {}
~Mem() { delete[] m_p; }
u8* Get() const { return m_p; }
private:
u8* m_p;
};
//! File object.
class File : NonCopyable
{
public:
explicit File( FILE* fp ) : m_fp( fp ) {}
~File() { if( m_fp ) fclose( m_fp ); }
bool IsValid() const { return m_fp != 0; }
FILE* Get() const { return m_fp; }
private:
FILE* m_fp;
};
//! PNG read object.
class PngReadStruct : NonCopyable
{
public:
PngReadStruct()
: m_png( 0 ),
m_info( 0 ),
m_end( 0 )
{
m_png = png_create_read_struct( PNG_LIBPNG_VER_STRING, 0, 0, 0 );
if( !m_png )
throw Error( "failed to create png read struct" );
m_info = png_create_info_struct( m_png );
m_end = png_create_info_struct( m_png );
if( !m_info || !m_end )
{
png_infopp info = m_info ? &m_info : 0;
png_infopp end = m_end ? &m_end : 0;
png_destroy_read_struct( &m_png, info, end );
throw Error( "failed to create png info structs" );
}
}
~PngReadStruct()
{
png_destroy_read_struct( &m_png, &m_info, &m_end );
}
png_structp GetPng() const { return m_png; }
png_infop GetInfo() const { return m_info; }
private:
png_structp m_png;
png_infop m_info, m_end;
};
//! PNG write object.
class PngWriteStruct : NonCopyable
{
public:
PngWriteStruct()
: m_png( 0 ),
m_info( 0 )
{
m_png = png_create_write_struct( PNG_LIBPNG_VER_STRING, 0, 0, 0 );
if( !m_png )
throw Error( "failed to create png read struct" );
m_info = png_create_info_struct( m_png );
if( !m_info )
{
png_infopp info = m_info ? &m_info : 0;
png_destroy_write_struct( &m_png, info );
throw Error( "failed to create png info structs" );
}
}
~PngWriteStruct()
{
png_destroy_write_struct( &m_png, &m_info );
}
png_structp GetPng() const { return m_png; }
png_infop GetInfo() const { return m_info; }
private:
png_structp m_png;
png_infop m_info;
};
//! PNG rows object.
class PngRows : NonCopyable
{
public:
PngRows( int width, int height, int stride ) : m_width( width ), m_height( height )
{
m_rows = ( png_bytep* )malloc( m_height*sizeof( png_bytep ) );
for( int i = 0; i < m_height; ++i )
m_rows[i] = ( png_bytep )malloc( m_width*stride );
}
~PngRows()
{
for( int i = 0; i < m_height; ++i )
free( m_rows[i] );
free( m_rows );
}
png_bytep* Get() const { return m_rows; }
private:
png_bytep* m_rows;
int m_width, m_height;
};
class PngImage
{
public:
explicit PngImage( std::string const& fileName );
int GetWidth() const { return m_width; }
int GetHeight() const { return m_height; }
int GetStride() const { return m_stride; }
bool IsColour() const { return m_colour; }
bool IsAlpha() const { return m_alpha; }
u8 const* GetRow( int row ) const { return ( u8* )m_rows[row]; }
private:
PngReadStruct m_png;
int m_width;
int m_height;
int m_stride;
bool m_colour;
bool m_alpha;
png_bytep* m_rows;
};
PngImage::PngImage( std::string const& fileName )
{
// open the source file
File file( fopen( fileName.c_str(), "rb" ) );
if( !file.IsValid() )
{
std::ostringstream oss;
oss << "failed to open \"" << fileName << "\" for reading";
throw Error( oss.str() );
}
// check the signature bytes
png_byte header[8];
fread( header, 1, 8, file.Get() );
if( png_sig_cmp( header, 0, 8 ) )
{
std::ostringstream oss;
oss << "\"" << fileName << "\" does not look like a png file";
throw Error( oss.str() );
}
// read the image into memory
png_init_io( m_png.GetPng(), file.Get() );
png_set_sig_bytes( m_png.GetPng(), 8 );
png_read_png( m_png.GetPng(), m_png.GetInfo(), PNG_TRANSFORM_EXPAND, 0 );
// get the image info
png_uint_32 width;
png_uint_32 height;
int bitDepth;
int colourType;
png_get_IHDR( m_png.GetPng(), m_png.GetInfo(), &width, &height, &bitDepth, &colourType, 0, 0, 0 );
// check the image is 8 bit
if( bitDepth != 8 )
{
std::ostringstream oss;
oss << "cannot process " << bitDepth << "-bit image (bit depth must be 8)";
throw Error( oss.str() );
}
// save the info
m_width = width;
m_height = height;
m_colour = ( ( colourType & PNG_COLOR_MASK_COLOR ) != 0 );
m_alpha = ( ( colourType & PNG_COLOR_MASK_ALPHA ) != 0 );
m_stride = ( m_colour ? 3 : 1 ) + ( m_alpha ? 1 : 0 );
// get the image rows
m_rows = png_get_rows( m_png.GetPng(), m_png.GetInfo() );
if( !m_rows )
throw Error( "failed to get image rows" );
}
static void Compress( std::string const& sourceFileName, std::string const& targetFileName, int flags )
{
// load the source image
PngImage sourceImage( sourceFileName );
// get the image info
int width = sourceImage.GetWidth();
int height = sourceImage.GetHeight();
int stride = sourceImage.GetStride();
bool colour = sourceImage.IsColour();
bool alpha = sourceImage.IsAlpha();
// check the image dimensions
if( ( width % 4 ) != 0 || ( height % 4 ) != 0 )
{
std::ostringstream oss;
oss << "cannot compress " << width << "x" << height
<< "image (dimensions must be multiples of 4)";
throw Error( oss.str() );
}
// create the target data
int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
int targetDataSize = bytesPerBlock*width*height/16;
Mem targetData( targetDataSize );
// loop over blocks and compress them
clock_t start = std::clock();
u8* targetBlock = targetData.Get();
for( int y = 0; y < height; y += 4 )
{
// process a row of blocks
for( int x = 0; x < width; x += 4 )
{
// get the block data
u8 sourceRgba[16*4];
for( int py = 0, i = 0; py < 4; ++py )
{
u8 const* row = sourceImage.GetRow( y + py ) + x*stride;
for( int px = 0; px < 4; ++px, ++i )
{
// get the pixel colour
if( colour )
{
for( int j = 0; j < 3; ++j )
sourceRgba[4*i + j] = *row++;
}
else
{
for( int j = 0; j < 3; ++j )
sourceRgba[4*i + j] = *row;
++row;
}
// skip alpha for now
if( alpha )
sourceRgba[4*i + 3] = *row++;
else
sourceRgba[4*i + 3] = 255;
}
}
// compress this block
Compress( sourceRgba, targetBlock, flags );
// advance
targetBlock += bytesPerBlock;
}
}
clock_t end = std::clock();
double duration = ( double )( end - start ) / CLOCKS_PER_SEC;
std::cout << "time taken: " << duration << " seconds" << std::endl;
// open the target file
File targetFile( fopen( targetFileName.c_str(), "wb" ) );
if( !targetFile.IsValid() )
{
std::ostringstream oss;
oss << "failed to open \"" << sourceFileName << "\" for writing";
throw Error( oss.str() );
}
// write the header
fwrite( &width, sizeof( int ), 1, targetFile.Get() );
fwrite( &height, sizeof( int ), 1, targetFile.Get() );
// write the data
fwrite( targetData.Get(), 1, targetDataSize, targetFile.Get() );
}
static void Decompress( std::string const& sourceFileName, std::string const& targetFileName, int flags )
{
// open the source file
File sourceFile( fopen( sourceFileName.c_str(), "rb" ) );
if( !sourceFile.IsValid() )
{
std::ostringstream oss;
oss << "failed to open \"" << sourceFileName << "\" for reading";
throw Error( oss.str() );
}
// get the width and height
int width, height;
fread( &width, sizeof( int ), 1, sourceFile.Get() );
fread( &height, sizeof( int ), 1, sourceFile.Get() );
// work out the data size
int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
int sourceDataSize = bytesPerBlock*width*height/16;
Mem sourceData( sourceDataSize );
// read the source data
fread( sourceData.Get(), 1, sourceDataSize, sourceFile.Get() );
// create the target rows
PngRows targetRows( width, height, 4 );
// loop over blocks and compress them
u8 const* sourceBlock = sourceData.Get();
for( int y = 0; y < height; y += 4 )
{
// process a row of blocks
for( int x = 0; x < width; x += 4 )
{
// decompress back
u8 targetRgba[16*4];
Decompress( targetRgba, sourceBlock, flags );
// write the data into the target rows
for( int py = 0, i = 0; py < 4; ++py )
{
u8* row = ( u8* )targetRows.Get()[y + py] + x*4;
for( int px = 0; px < 4; ++px, ++i )
{
for( int j = 0; j < 4; ++j )
*row++ = targetRgba[4*i + j];
}
}
// advance
sourceBlock += bytesPerBlock;
}
}
// create the target PNG
PngWriteStruct targetPng;
// set up the image
png_set_IHDR(
targetPng.GetPng(), targetPng.GetInfo(), width, height,
8, PNG_COLOR_TYPE_RGBA, PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT
);
// open the target file
File targetFile( fopen( targetFileName.c_str(), "wb" ) );
if( !targetFile.IsValid() )
{
std::ostringstream oss;
oss << "failed to open \"" << targetFileName << "\" for writing";
throw Error( oss.str() );
}
// write the image
png_set_rows( targetPng.GetPng(), targetPng.GetInfo(), targetRows.Get() );
png_init_io( targetPng.GetPng(), targetFile.Get() );
png_write_png( targetPng.GetPng(), targetPng.GetInfo(), PNG_TRANSFORM_IDENTITY, 0 );
}
static void Diff( std::string const& sourceFileName, std::string const& targetFileName )
{
// load the images
PngImage sourceImage( sourceFileName );
PngImage targetImage( targetFileName );
// get the image info
int width = sourceImage.GetWidth();
int height = sourceImage.GetHeight();
int sourceStride = sourceImage.GetStride();
int targetStride = targetImage.GetStride();
int stride = std::min( sourceStride, targetStride );
// check they match
if( width != targetImage.GetWidth() || height != targetImage.GetHeight() )
throw Error( "source and target dimensions do not match" );
// work out the error
double error = 0.0;
for( int y = 0; y < height; ++y )
{
u8 const* sourceRow = sourceImage.GetRow( y );
u8 const* targetRow = targetImage.GetRow( y );
for( int x = 0; x < width; ++x )
{
u8 const* sourcePixel = sourceRow + x*sourceStride;
u8 const* targetPixel = targetRow + x*targetStride;
for( int i = 0; i < stride; ++i )
{
int diff = ( int )sourcePixel[i] - ( int )targetPixel[i];
error += ( double )( diff*diff );
}
}
}
error = std::sqrt( error / ( width*height ) );
// print it out
std::cout << "rms error: " << error << std::endl;
}
enum Mode
{
kCompress,
kDecompress,
kDiff
};
int main( int argc, char* argv[] )
{
try
{
// parse the command-line
std::string sourceFileName;
std::string targetFileName;
Mode mode = kCompress;
int method = kDxt1;
int metric = kColourMetricPerceptual;
int fit = kColourClusterFit;
int extra = 0;
bool help = false;
bool arguments = true;
for( int i = 1; i < argc; ++i )
{
// check for options
char const* word = argv[i];
if( arguments && word[0] == '-' )
{
for( int j = 1; word[j] != '\0'; ++j )
{
switch( word[j] )
{
case 'h': help = true; break;
case 'c': mode = kCompress; break;
case 'd': mode = kDecompress; break;
case 'e': mode = kDiff; break;
case '1': method = kDxt1; break;
case '3': method = kDxt3; break;
case '5': method = kDxt5; break;
case 'u': metric = kColourMetricUniform; break;
case 'r': fit = kColourRangeFit; break;
case 'w': extra = kWeightColourByAlpha; break;
case '-': arguments = false; break;
default:
std::cerr << "unknown option '" << word[j] << "'" << std::endl;
return -1;
}
}
}
else
{
if( sourceFileName.empty() )
sourceFileName.assign( word );
else if( targetFileName.empty() )
targetFileName.assign( word );
else
{
std::cerr << "unexpected argument \"" << word << "\"" << std::endl;
}
}
}
// check arguments
if( help )
{
std::cout
<< "SYNTAX" << std::endl
<< "\tsquishpng [-cde135] <source> <target>" << std::endl
<< "OPTIONS" << std::endl
<< "\t-c\tCompress source png to target raw dxt (default)" << std::endl
<< "\t-135\tSpecifies whether to use DXT1 (default), DXT3 or DXT5 compression" << std::endl
<< "\t-u\tUse a uniform colour metric during colour compression" << std::endl
<< "\t-r\tUse the fast but inferior range-based colour compressor" << std::endl
<< "\t-w\tWeight colour values by alpha in the cluster colour compressor" << std::endl
<< "\t-d\tDecompress source raw dxt to target png" << std::endl
<< "\t-e\tDiff source and target png" << std::endl
;
return 0;
}
if( sourceFileName.empty() )
{
std::cerr << "no source file given" << std::endl;
return -1;
}
if( targetFileName.empty() )
{
std::cerr << "no target file given" << std::endl;
return -1;
}
// do the work
switch( mode )
{
case kCompress:
Compress( sourceFileName, targetFileName, method | metric | fit | extra );
break;
case kDecompress:
Decompress( sourceFileName, targetFileName, method );
break;
case kDiff:
Diff( sourceFileName, targetFileName );
break;
default:
std::cerr << "unknown mode" << std::endl;
throw std::exception();
}
}
catch( std::exception& excuse )
{
// complain
std::cerr << "squishpng error: " << excuse.what() << std::endl;
return -1;
}
// done
return 0;
}

View File

@ -0,0 +1,205 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
/*! @file
@brief This program tests the error for 1 and 2-colour DXT compression.
This tests the effectiveness of the DXT compression algorithm for all
possible 1 and 2-colour blocks of pixels.
*/
#include <squish.h>
#include <iostream>
#include <cmath>
#include <cfloat>
using namespace squish;
double GetColourError( u8 const* a, u8 const* b )
{
double error = 0.0;
for( int i = 0; i < 16; ++i )
{
for( int j = 0; j < 3; ++j )
{
int index = 4*i + j;
int diff = ( int )a[index] - ( int )b[index];
error += ( double )( diff*diff );
}
}
return error / 16.0;
}
void TestOneColour( int flags )
{
u8 input[4*16];
u8 output[4*16];
u8 block[16];
double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
int counter = 0;
// test all single-channel colours
for( int i = 0; i < 16*4; ++i )
input[i] = ( ( i % 4 ) == 3 ) ? 255 : 0;
for( int channel = 0; channel < 3; ++channel )
{
for( int value = 0; value < 255; ++value )
{
// set the channnel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = ( u8 )value;
// compress and decompress
Compress( input, block, flags );
Decompress( output, block, flags );
// test the results
double rm = GetColourError( input, output );
double rms = std::sqrt( rm );
// accumulate stats
min = std::min( min, rms );
max = std::max( max, rms );
avg += rm;
++counter;
}
// reset the channel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = 0;
}
// finish stats
avg = std::sqrt( avg/counter );
// show stats
std::cout << "one colour error (min, max, avg): "
<< min << ", " << max << ", " << avg << std::endl;
}
void TestOneColourRandom( int flags )
{
u8 input[4*16];
u8 output[4*16];
u8 block[16];
double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
int counter = 0;
// test all single-channel colours
for( int test = 0; test < 1000; ++test )
{
// set a constant random colour
for( int channel = 0; channel < 3; ++channel )
{
u8 value = ( u8 )( rand() & 0xff );
for( int i = 0; i < 16; ++i )
input[4*i + channel] = value;
}
for( int i = 0; i < 16; ++i )
input[4*i + 3] = 255;
// compress and decompress
Compress( input, block, flags );
Decompress( output, block, flags );
// test the results
double rm = GetColourError( input, output );
double rms = std::sqrt( rm );
// accumulate stats
min = std::min( min, rms );
max = std::max( max, rms );
avg += rm;
++counter;
}
// finish stats
avg = std::sqrt( avg/counter );
// show stats
std::cout << "random one colour error (min, max, avg): "
<< min << ", " << max << ", " << avg << std::endl;
}
void TestTwoColour( int flags )
{
u8 input[4*16];
u8 output[4*16];
u8 block[16];
double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
int counter = 0;
// test all single-channel colours
for( int i = 0; i < 16*4; ++i )
input[i] = ( ( i % 4 ) == 3 ) ? 255 : 0;
for( int channel = 0; channel < 3; ++channel )
{
for( int value1 = 0; value1 < 255; ++value1 )
{
for( int value2 = value1 + 1; value2 < 255; ++value2 )
{
// set the channnel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = ( u8 )( ( i < 8 ) ? value1 : value2 );
// compress and decompress
Compress( input, block, flags );
Decompress( output, block, flags );
// test the results
double rm = GetColourError( input, output );
double rms = std::sqrt( rm );
// accumulate stats
min = std::min( min, rms );
max = std::max( max, rms );
avg += rm;
++counter;
}
}
// reset the channel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = 0;
}
// finish stats
avg = std::sqrt( avg/counter );
// show stats
std::cout << "two colour error (min, max, avg): "
<< min << ", " << max << ", " << avg << std::endl;
}
int main()
{
TestOneColourRandom( kDxt1 | kColourRangeFit );
TestOneColour( kDxt1 );
TestTwoColour( kDxt1 );
}

Some files were not shown because too many files have changed in this diff Show More