diff --git a/CMakeLists.txt b/CMakeLists.txt index f187fe2..059a08c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,7 +39,7 @@ IF(MSVC) ADD_DEFINITIONS(-D__SSE2__ -D__SSE__ -D__MMX__) ENDIF(MSVC) -#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=pentium4") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=pentium4") #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=powerpc") # ibook G4: @@ -48,3 +48,6 @@ ENDIF(MSVC) ADD_SUBDIRECTORY(src) +IF(WIN32) + ADD_SUBDIRECTORY(gnuwin32) +ENDIF(WIN32) diff --git a/cmake/FindCUDA.cmake b/cmake/FindCUDA.cmake index 6dfc6af..603310e 100644 --- a/cmake/FindCUDA.cmake +++ b/cmake/FindCUDA.cmake @@ -19,6 +19,7 @@ IF (WIN32) ELSE(WIN32) FIND_PROGRAM (CUDA_COMPILER nvcc $ENV{CUDA_BIN_PATH} + /usr/local/cuda/bin DOC "The CUDA Compiler") ENDIF(WIN32) @@ -37,6 +38,14 @@ FIND_PATH (CUDA_INCLUDE_PATH cuda_runtime.h DOC "The directory where CUDA headers reside") FIND_LIBRARY (CUDA_LIBRARY + NAMES cuda + PATHS + $ENV{CUDA_LIB_PATH} + ${CUDA_COMPILER_SUPER_DIR}/lib + ${CUDA_COMPILER_DIR} + DOC "The CUDA library") + +FIND_LIBRARY (CUDA_RUNTIME_LIBRARY NAMES cudart PATHS $ENV{CUDA_LIB_PATH} @@ -45,21 +54,23 @@ FIND_LIBRARY (CUDA_LIBRARY DOC "The CUDA runtime library") -IF (CUDA_INCLUDE_PATH) +IF (CUDA_INCLUDE_PATH AND CUDA_LIBRARY AND CUDA_RUNTIME_LIBRARY) SET (CUDA_FOUND 1 CACHE STRING "Set to 1 if CUDA is found, 0 otherwise") -ELSE (CUDA_INCLUDE_PATH) +ELSE (CUDA_INCLUDE_PATH AND CUDA_LIBRARY AND CUDA_RUNTIME_LIBRARY) SET (CUDA_FOUND 0 CACHE STRING "Set to 1 if CUDA is found, 0 otherwise") -ENDIF (CUDA_INCLUDE_PATH) +ENDIF (CUDA_INCLUDE_PATH AND CUDA_LIBRARY AND CUDA_RUNTIME_LIBRARY) + +SET (CUDA_LIBRARIES ${CUDA_LIBRARY} ${CUDA_RUNTIME_LIBRARY}) -MARK_AS_ADVANCED (CUDA_FOUND CUDA_COMPILER) +MARK_AS_ADVANCED (CUDA_FOUND CUDA_COMPILER CUDA_LIBRARY CUDA_RUNTIME_LIBRARY) #SET(CUDA_OPTIONS "-ncfe") SET(CUDA_OPTIONS "") -IF (CUDA_EMU) +IF (CUDA_EMULATION) SET (CUDA_OPTIONS "${CUDA_OPTIONS} -deviceemu") -ENDIF (CUDA_EMU) +ENDIF (CUDA_EMULATION) # Get include directories. @@ -89,15 +100,24 @@ MACRO (GET_CUFILE_DEPENDENCIES dependencies file) # parse file for dependencies FILE(READ "${file}" CONTENTS) - STRING(REGEX MATCHALL "#[ \t]*include[ \t]+[<\"][^>\"]*" DEPS "${CONTENTS}") SET(${dependencies}) FOREACH(DEP ${DEPS}) - STRING(REGEX REPLACE "#[ \t]*include[ \t]+[<\"]" "" DEP "${DEP}") - SET(${dependencies} ${${dependencies}} ${filepath}/${DEP}) + STRING(REGEX REPLACE "#[ \t]*include[ \t]+\"" "" DEP "${DEP}") + + FIND_PATH(PATH_OF_${DEP} ${DEP} + ${filepath}) + + IF(NOT ${PATH_OF_${DEP}} STREQUAL PATH_OF_${DEP}-NOTFOUND) + MESSAGE("${filepath} ${PATH_OF_${DEP}}/${DEP}") + SET(${dependencies} ${${dependencies}} ${PATH_OF_${DEP}}/${DEP}) + ENDIF(NOT ${PATH_OF_${DEP}} STREQUAL PATH_OF_${DEP}-NOTFOUND) + + # reset path. + #SET(PATH_OF_${DEP}) ENDFOREACH(DEP) - + ENDMACRO (GET_CUFILE_DEPENDENCIES) diff --git a/gnuwin32/CMakeLists.txt b/gnuwin32/CMakeLists.txt new file mode 100644 index 0000000..ab3c0cb --- /dev/null +++ b/gnuwin32/CMakeLists.txt @@ -0,0 +1,7 @@ + +INSTALL(PROGRAMS + bin/jpeg62.dll + bin/libpng13.dll + bin/libtiff3.dll + bin/zlib1.dll + DESTINATION bin) diff --git a/src/nvcore/CMakeLists.txt b/src/nvcore/CMakeLists.txt index 56d7c57..6c7e849 100644 --- a/src/nvcore/CMakeLists.txt +++ b/src/nvcore/CMakeLists.txt @@ -34,3 +34,7 @@ ENDIF(NVCORE_SHARED) TARGET_LINK_LIBRARIES(nvcore ${LIBS}) +INSTALL(TARGETS nvcore + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib/static) diff --git a/src/nvcore/Debug.h b/src/nvcore/Debug.h index c3a147c..c0c8df3 100644 --- a/src/nvcore/Debug.h +++ b/src/nvcore/Debug.h @@ -13,6 +13,8 @@ #define NV_ABORT_IGNORE 2 #define NV_ABORT_EXIT 3 +#undef assert // avoid conflicts with assert method. + #define nvNoAssert(exp) \ do { \ (void)sizeof(exp); \ diff --git a/src/nvcore/StdStream.h b/src/nvcore/StdStream.h index 6304387..be56b75 100644 --- a/src/nvcore/StdStream.h +++ b/src/nvcore/StdStream.h @@ -13,6 +13,7 @@ namespace nv // Portable version of fopen. inline FILE * fileOpen(const char * fileName, const char * mode) { + nvCheck(fileName != NULL); #if NV_CC_MSVC && _MSC_VER >= 1400 FILE * fp; if (fopen_s(&fp, fileName, mode) == 0) { @@ -48,6 +49,7 @@ public: virtual void seek( int pos ) { nvDebugCheck(m_fp != NULL); + nvDebugCheck(pos >= 0 && pos < size()); fseek(m_fp, pos, SEEK_SET); } @@ -59,6 +61,7 @@ public: virtual int size() const { + nvDebugCheck(m_fp != NULL); int pos = ftell(m_fp); fseek(m_fp, 0, SEEK_END); int end = ftell(m_fp); diff --git a/src/nvcore/StrLib.cpp b/src/nvcore/StrLib.cpp index 585931f..5a238f4 100644 --- a/src/nvcore/StrLib.cpp +++ b/src/nvcore/StrLib.cpp @@ -196,6 +196,12 @@ StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL) copy(s); } +/** Copy string. */ +StringBuilder::StringBuilder( const char * s ) +{ + copy(s); +} + /** Allocate and copy string. */ StringBuilder::StringBuilder( int size_hint, const StringBuilder & s) : m_size(size_hint), m_str(NULL) { diff --git a/src/nvcore/StrLib.h b/src/nvcore/StrLib.h index f6b159d..274e095 100644 --- a/src/nvcore/StrLib.h +++ b/src/nvcore/StrLib.h @@ -44,6 +44,7 @@ namespace nv NVCORE_API StringBuilder(); NVCORE_API explicit StringBuilder( int size_hint ); + NVCORE_API StringBuilder( const char * str ); NVCORE_API StringBuilder( const StringBuilder & ); NVCORE_API StringBuilder( int size_hint, const StringBuilder & ); NVCORE_API StringBuilder( const char * format, ... ) __attribute__((format (printf, 2, 3))); @@ -69,7 +70,7 @@ namespace nv NVCORE_API StringBuilder & toUpper(); NVCORE_API void reset(); - NVCORE_API bool empty() const { return m_size == 0; } + NVCORE_API bool isNull() const { return m_size == 0; } // const char * accessors operator const char * () const { return m_str; } @@ -81,22 +82,27 @@ namespace nv StringBuilder & operator=( const StringBuilder & s ) { return copy(s); } - + + /// Implement value semantics. + StringBuilder & operator=( const char * s ) { + return copy(s); + } + /// Equal operator. bool operator==( const StringBuilder & s ) const { - nvCheck(m_str != NULL); - nvCheck(s.m_str != NULL); - return strcmp(s.m_str, m_str) != 0; + if (s.isNull()) return isNull(); + else if (isNull()) return false; + else return strcmp(s.m_str, m_str) != 0; } /// Return the exact length. - uint length() const { nvCheck(m_str != NULL); return uint(strlen(m_str)); } + uint length() const { return isNull() ? 0 : uint(strlen(m_str)); } /// Return the size of the string container. - uint capacity() const { nvCheck(m_str != NULL); return m_size; } + uint capacity() const { return m_size; } /// Return the hash of the string. - uint hash() const { nvCheck(m_str != NULL); return strHash(m_str); } + uint hash() const { return isNull() ? 0 : strHash(m_str); } /// Swap strings. friend void swap(StringBuilder & a, StringBuilder & b) { @@ -104,8 +110,6 @@ namespace nv nv::swap(a.m_str, b.m_str); } - static char separator(); - protected: /// Size of the string container. diff --git a/src/nvimage/CMakeLists.txt b/src/nvimage/CMakeLists.txt index 4a8a927..10544ee 100644 --- a/src/nvimage/CMakeLists.txt +++ b/src/nvimage/CMakeLists.txt @@ -55,4 +55,8 @@ ENDIF(NVIMAGE_SHARED) TARGET_LINK_LIBRARIES(nvimage ${LIBS} nvcore nvmath posh) +INSTALL(TARGETS nvimage + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib/static) diff --git a/src/nvimage/DirectDrawSurface.cpp b/src/nvimage/DirectDrawSurface.cpp index 3c4ed0f..098b17b 100644 --- a/src/nvimage/DirectDrawSurface.cpp +++ b/src/nvimage/DirectDrawSurface.cpp @@ -528,6 +528,13 @@ void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap) } } +static uint8 bitExpand(uint8 c, uint bits) +{ + int shifts = 0; + uint8 output = c; + // @@ TODO!!! + +} void DirectDrawSurface::readLinearImage(Image * img) { diff --git a/src/nvimage/nvtt/cuda/CompressKernel.cu b/src/nvimage/nvtt/cuda/CompressKernel.cu index 84079ca..c75751a 100644 --- a/src/nvimage/nvtt/cuda/CompressKernel.cu +++ b/src/nvimage/nvtt/cuda/CompressKernel.cu @@ -220,7 +220,7 @@ __device__ float evalPermutation4(const float3 * colors, uint permutation, ushor alphax_sum += alpha * colors[i]; betax_sum += beta * colors[i]; } - + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; @@ -689,33 +689,33 @@ __global__ void compressWeighted(const uint * permutations, const uint * image, __device__ float computeError(const float weights[16], uchar a0, uchar a1) { - float palette[6]; - palette[0] = (6.0f/7.0f * a0 + 1.0f/7.0f * a1); - palette[1] = (5.0f/7.0f * a0 + 2.0f/7.0f * a1); - palette[2] = (4.0f/7.0f * a0 + 3.0f/7.0f * a1); - palette[3] = (3.0f/7.0f * a0 + 4.0f/7.0f * a1); - palette[4] = (2.0f/7.0f * a0 + 5.0f/7.0f * a1); - palette[5] = (1.0f/7.0f * a0 + 6.0f/7.0f * a1); - - float total = 0.0f; - - for (uint i = 0; i < 16; i++) - { - float alpha = weights[i]; - - float error = a0 - alpha; - error = min(error, palette[0] - alpha); - error = min(error, palette[1] - alpha); - error = min(error, palette[2] - alpha); - error = min(error, palette[3] - alpha); - error = min(error, palette[4] - alpha); - error = min(error, palette[5] - alpha); - error = min(error, a1 - alpha); - - total += error; - } - - return total; + float palette[6]; + palette[0] = (6.0f/7.0f * a0 + 1.0f/7.0f * a1); + palette[1] = (5.0f/7.0f * a0 + 2.0f/7.0f * a1); + palette[2] = (4.0f/7.0f * a0 + 3.0f/7.0f * a1); + palette[3] = (3.0f/7.0f * a0 + 4.0f/7.0f * a1); + palette[4] = (2.0f/7.0f * a0 + 5.0f/7.0f * a1); + palette[5] = (1.0f/7.0f * a0 + 6.0f/7.0f * a1); + + float total = 0.0f; + + for (uint i = 0; i < 16; i++) + { + float alpha = weights[i]; + + float error = a0 - alpha; + error = min(error, palette[0] - alpha); + error = min(error, palette[1] - alpha); + error = min(error, palette[2] - alpha); + error = min(error, palette[3] - alpha); + error = min(error, palette[4] - alpha); + error = min(error, palette[5] - alpha); + error = min(error, a1 - alpha); + + total += error; + } + + return total; } inline __device__ uchar roundAndExpand(float a) @@ -726,35 +726,35 @@ inline __device__ uchar roundAndExpand(float a) /* __device__ void optimizeAlpha8(const float alphas[16], uchar & a0, uchar & a1) { - float alpha2_sum = 0; - float beta2_sum = 0; - float alphabeta_sum = 0; - float alphax_sum = 0; - float betax_sum = 0; - - for (int i = 0; i < 16; i++) - { - uint idx = index[i]; - float alpha; - if (idx < 2) alpha = 1.0f - idx; - else alpha = (8.0f - idx) / 7.0f; - - float beta = 1 - alpha; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * alphas[i]; - betax_sum += beta * alphas[i]; - } - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - a0 = roundAndExpand(a); - a1 = roundAndExpand(b); + float alpha2_sum = 0; + float beta2_sum = 0; + float alphabeta_sum = 0; + float alphax_sum = 0; + float betax_sum = 0; + + for (int i = 0; i < 16; i++) + { + uint idx = index[i]; + float alpha; + if (idx < 2) alpha = 1.0f - idx; + else alpha = (8.0f - idx) / 7.0f; + + float beta = 1 - alpha; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * alphas[i]; + betax_sum += beta * alphas[i]; + } + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + a0 = roundAndExpand(a); + a1 = roundAndExpand(b); } */ diff --git a/src/nvimage/nvtt/tools/compress.cpp b/src/nvimage/nvtt/tools/compress.cpp index f0be6e6..34fff47 100644 --- a/src/nvimage/nvtt/tools/compress.cpp +++ b/src/nvimage/nvtt/tools/compress.cpp @@ -238,7 +238,7 @@ int main(int argc, char *argv[]) } } - if (input.empty()) + if (input.isNull()) { printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n"); diff --git a/src/nvmath/CMakeLists.txt b/src/nvmath/CMakeLists.txt index ad52042..d483958 100644 --- a/src/nvmath/CMakeLists.txt +++ b/src/nvmath/CMakeLists.txt @@ -10,7 +10,8 @@ SET(MATH_SRCS Fitting.h Fitting.cpp Montecarlo.h Montecarlo.cpp Random.h Random.cpp - SphericalHarmonic.h SphericalHarmonic.cpp) + SphericalHarmonic.h SphericalHarmonic.cpp + Basis.h Basis.cpp) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) @@ -25,3 +26,7 @@ ENDIF(NVMATH_SHARED) TARGET_LINK_LIBRARIES(nvmath ${LIBS} nvcore) +INSTALL(TARGETS nvmath + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib/static) diff --git a/src/nvmath/Matrix.h b/src/nvmath/Matrix.h index fd52139..83a2cbb 100644 --- a/src/nvmath/Matrix.h +++ b/src/nvmath/Matrix.h @@ -33,12 +33,12 @@ public: Vector4 row(uint i) const; Vector4 column(uint i) const; - void scale(double s); + void scale(scalar s); void scale(Vector3::Arg s); void translate(Vector3::Arg t); void rotate(scalar theta, scalar v0, scalar v1, scalar v2); Matrix inverse(); - double determinant(); + scalar determinant(); void apply(Matrix::Arg m); @@ -110,7 +110,7 @@ inline Vector4 Matrix::column(uint i) const } /// Apply scale. -inline void Matrix::scale(double s) +inline void Matrix::scale(scalar s) { m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s; m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s; @@ -150,7 +150,7 @@ inline void Matrix::apply(Matrix::Arg m) nvDebugCheck(this != &m); for(int i = 0; i < 4; i++) { - const float ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3); + const scalar ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3); m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0); m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1); m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2); @@ -305,7 +305,7 @@ inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear) } /// Get matrix determinant. -inline double Matrix::determinant() +inline scalar Matrix::determinant() { return m_data[3] * m_data[6] * m_data[9] * m_data[12]-m_data[2] * m_data[7] * m_data[9] * m_data[12]-m_data[3] * m_data[5] * m_data[10] * m_data[12]+m_data[1] * m_data[7] * m_data[10] * m_data[12]+ m_data[2] * m_data[5] * m_data[11] * m_data[12]-m_data[1] * m_data[6] * m_data[11] * m_data[12]-m_data[3] * m_data[6] * m_data[8] * m_data[13]+m_data[2] * m_data[7] * m_data[8] * m_data[13]+ @@ -315,9 +315,19 @@ inline double Matrix::determinant() m_data[2] * m_data[4] * m_data[9] * m_data[15]-m_data[0] * m_data[6] * m_data[9] * m_data[15]-m_data[1] * m_data[4] * m_data[10] * m_data[15]+m_data[0] * m_data[5] * m_data[10] * m_data[15]; } -//inline Matrix transpose(Matrix::Arg m) -//{ -//} +inline Matrix transpose(Matrix::Arg m) +{ + Matrix r; + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; i++) + { + r(i, j) = m(j, i); + } + } + return r; +} + inline Matrix Matrix::inverse() { Matrix r; diff --git a/src/nvmath/Vector.h b/src/nvmath/Vector.h index f2aed6f..3875846 100644 --- a/src/nvmath/Vector.h +++ b/src/nvmath/Vector.h @@ -30,6 +30,8 @@ public: scalar x() const; scalar y() const; + const scalar * ptr() const; + void set(scalar x, scalar y); Vector2 operator-() const; @@ -143,6 +145,11 @@ inline const Vector2 & Vector2::operator=(Vector2::Arg v) inline scalar Vector2::x() const { return m_x; } inline scalar Vector2::y() const { return m_y; } +inline const scalar * Vector2::ptr() const +{ + return &m_x; +} + inline void Vector2::set(scalar x, scalar y) { m_x = x; @@ -397,6 +404,11 @@ inline Vector2 operator*(Vector2::Arg v, scalar s) return scale(v, s); } +inline Vector2 operator*(Vector2::Arg v1, Vector2::Arg v2) +{ + return Vector2(v1.x()*v2.x(), v1.y()*v2.y()); +} + inline Vector2 operator*(scalar s, Vector2::Arg v) { return scale(v, s); @@ -417,6 +429,11 @@ inline scalar length(Vector2::Arg v) return sqrtf(length_squared(v)); } +inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON) +{ + return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon); +} + inline Vector2 min(Vector2::Arg a, Vector2::Arg b) { return Vector2(min(a.x(), b.x()), min(a.y(), b.y())); @@ -532,6 +549,11 @@ inline Vector3 normalizeSafe(Vector3::Arg v, Vector3::Arg fallback, float epsilo return scale(v, 1.0f / l); } +inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON) +{ + return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon) && equal(v1.z(), v2.z(), epsilon); +} + inline Vector3 min(Vector3::Arg a, Vector3::Arg b) { return Vector3(min(a.x(), b.x()), min(a.y(), b.y()), min(a.z(), b.z())); @@ -643,6 +665,11 @@ inline Vector4 normalizeSafe(Vector4::Arg v, Vector4::Arg fallback, float epsilo return scale(v, 1.0f / l); } +inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON) +{ + return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon) && equal(v1.z(), v2.z(), epsilon) && equal(v1.w(), v2.w(), epsilon); +} + inline Vector4 min(Vector4::Arg a, Vector4::Arg b) { return Vector4(min(a.x(), b.x()), min(a.y(), b.y()), min(a.z(), b.z()), min(a.w(), b.w()));