From a08333747362b5ac1518581063b48a6a7b90188e Mon Sep 17 00:00:00 2001 From: Ignacio Date: Tue, 24 Mar 2015 12:14:49 -0700 Subject: [PATCH] Merge changes from The Witness. --- project/vc9/bc6h/bc6h.vcproj | 18 +- project/vc9/bc7/bc7.vcproj | 34 +- project/vc9/nvmath/nvmath.vcproj | 8 + project/vc9/nvtt.sln | 2 + project/vc9/nvtt/nvtt.vcproj | 16 + project/vc9/testsuite/testsuite.vcproj | 24 +- src/CMakeLists.txt | 31 +- src/nvcore/nvcore.h | 6 +- src/nvimage/BlockDXT.cpp | 15 +- src/nvimage/BlockDXT.h | 3 +- src/nvimage/ColorBlock.cpp | 11 +- src/nvimage/ColorBlock.h | 9 +- src/nvimage/DirectDrawSurface.cpp | 15 +- src/nvimage/ErrorMetric.cpp | 32 +- src/nvimage/ErrorMetric.h | 14 +- src/nvmath/PackedFloat.cpp | 61 ++++ src/nvmath/PackedFloat.h | 79 +++++ src/nvmath/SimdVector.h | 23 -- src/nvmath/nvmath.h | 43 +++ src/nvthread/Atomic.h | 24 +- src/nvthread/Event.cpp | 4 +- src/nvthread/Mutex.cpp | 46 ++- src/nvthread/Mutex.h | 2 +- src/nvthread/ParallelFor.cpp | 6 +- src/nvthread/ParallelFor.h | 2 +- src/nvthread/Thread.cpp | 60 +++- src/nvthread/Thread.h | 1 + src/nvthread/ThreadPool.cpp | 2 +- src/nvthread/ThreadPool.h | 2 +- src/nvthread/nvthread.cpp | 7 + src/nvthread/nvthread.h | 2 + src/nvtt/BlockCompressor.cpp | 128 ++++++-- src/nvtt/BlockCompressor.h | 6 +- src/nvtt/CMakeLists.txt | 1 + src/nvtt/ClusterFit.cpp | 3 +- src/nvtt/ClusterFit.h | 2 +- src/nvtt/CompressionOptions.cpp | 2 +- src/nvtt/CompressorDX10.cpp | 2 + src/nvtt/CompressorDX10.h | 4 +- src/nvtt/CompressorDX11.cpp | 38 +-- src/nvtt/CompressorDX11.h | 8 +- src/nvtt/CompressorDX9.cpp | 399 +---------------------- src/nvtt/CompressorDX9.h | 8 +- src/nvtt/CompressorDXT1.cpp | 64 ++-- src/nvtt/CompressorDXT1.h | 9 +- src/nvtt/CompressorDXT5_RGBM.cpp | 423 +++++++++++++++++++++++++ src/nvtt/CompressorDXT5_RGBM.h | 9 + src/nvtt/CompressorRGB.cpp | 173 ++++++++++ src/nvtt/Context.cpp | 53 ++-- src/nvtt/OptimalCompressDXT.cpp | 3 +- src/nvtt/QuickCompressDXT.cpp | 11 +- src/nvtt/Surface.cpp | 287 +++++++++++++++-- src/nvtt/nvtt.cpp | 9 +- src/nvtt/nvtt.h | 14 +- src/nvtt/tools/compress.cpp | 384 ++++++++++++++-------- src/nvtt/tools/decompress.cpp | 205 +++++++----- src/nvtt/tools/imgdiff.cpp | 120 ++++++- 57 files changed, 2056 insertions(+), 911 deletions(-) create mode 100755 src/nvmath/PackedFloat.cpp create mode 100755 src/nvmath/PackedFloat.h create mode 100755 src/nvtt/CompressorDXT5_RGBM.cpp create mode 100755 src/nvtt/CompressorDXT5_RGBM.h diff --git a/project/vc9/bc6h/bc6h.vcproj b/project/vc9/bc6h/bc6h.vcproj index c056d14..ba159ed 100755 --- a/project/vc9/bc6h/bc6h.vcproj +++ b/project/vc9/bc6h/bc6h.vcproj @@ -267,39 +267,39 @@ diff --git a/project/vc9/bc7/bc7.vcproj b/project/vc9/bc7/bc7.vcproj index 38e0d1f..4dfdfa7 100644 --- a/project/vc9/bc7/bc7.vcproj +++ b/project/vc9/bc7/bc7.vcproj @@ -267,71 +267,71 @@ diff --git a/project/vc9/nvmath/nvmath.vcproj b/project/vc9/nvmath/nvmath.vcproj index 16c8b7d..7ab7db8 100644 --- a/project/vc9/nvmath/nvmath.vcproj +++ b/project/vc9/nvmath/nvmath.vcproj @@ -334,6 +334,14 @@ RelativePath="..\..\..\src\nvmath\nvmath.h" > + + + + diff --git a/project/vc9/nvtt.sln b/project/vc9/nvtt.sln index 8d41bc3..76457fb 100644 --- a/project/vc9/nvtt.sln +++ b/project/vc9/nvtt.sln @@ -48,6 +48,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompres ProjectSection(ProjectDependencies) = postProject {F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} + {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} @@ -57,6 +58,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvim ProjectSection(ProjectDependencies) = postProject {F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} + {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} diff --git a/project/vc9/nvtt/nvtt.vcproj b/project/vc9/nvtt/nvtt.vcproj index 3343500..9f27172 100644 --- a/project/vc9/nvtt/nvtt.vcproj +++ b/project/vc9/nvtt/nvtt.vcproj @@ -938,6 +938,22 @@ RelativePath="..\..\..\src\nvtt\CompressorDX9.h" > + + + + + + + + diff --git a/project/vc9/testsuite/testsuite.vcproj b/project/vc9/testsuite/testsuite.vcproj index 01fd7a8..47110c8 100644 --- a/project/vc9/testsuite/testsuite.vcproj +++ b/project/vc9/testsuite/testsuite.vcproj @@ -99,8 +99,8 @@ allocate(4, 4); + // Convert ZOH's tile struct to Vector3, and convert half to float. for (uint y = 0; y < 4; ++y) { for (uint x = 0; x < 4; ++x) @@ -646,13 +645,9 @@ void BlockBC6::decodeBlock(ColorSet * set) const uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x); uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y); uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z); - set->colors[y * 4 + x].x = to_float(rHalf); - set->colors[y * 4 + x].y = to_float(gHalf); - set->colors[y * 4 + x].z = to_float(bHalf); - set->colors[y * 4 + x].w = 1.0f; - - // Set indices in case someone uses them - set->indices[y * 4 + x] = y * 4 + x; + colors[y * 4 + x].x = to_float(rHalf); + colors[y * 4 + x].y = to_float(gHalf); + colors[y * 4 + x].z = to_float(bHalf); } } } diff --git a/src/nvimage/BlockDXT.h b/src/nvimage/BlockDXT.h index e03cff7..40f615f 100644 --- a/src/nvimage/BlockDXT.h +++ b/src/nvimage/BlockDXT.h @@ -35,6 +35,7 @@ namespace nv struct ColorSet; struct AlphaBlock4x4; class Stream; + class Vector3; /// DXT1 block. @@ -219,7 +220,7 @@ namespace nv struct BlockBC6 { uint8 data[16]; // Not even going to try to write a union for this thing. - void decodeBlock(ColorSet * set) const; + void decodeBlock(Vector3 colors[16]) const; }; /// BC7 block. diff --git a/src/nvimage/ColorBlock.cpp b/src/nvimage/ColorBlock.cpp index 026bb36..ad8f2b7 100644 --- a/src/nvimage/ColorBlock.cpp +++ b/src/nvimage/ColorBlock.cpp @@ -462,7 +462,7 @@ float ColorBlock::volume() const return bounds.volume(); }*/ - +#if 0 void ColorSet::allocate(uint w, uint h) { nvDebugCheck(w <= 4 && h <= 4); @@ -680,6 +680,7 @@ bool ColorSet::hasAlpha() const } return false; } +#endif // 0 void AlphaBlock4x4::init(uint8 a) @@ -707,7 +708,7 @@ void AlphaBlock4x4::init(const ColorBlock & src, uint channel) -void AlphaBlock4x4::init(const ColorSet & src, uint channel) +/*void AlphaBlock4x4::init(const ColorSet & src, uint channel) { nvCheck(channel >= 0 && channel < 4); @@ -727,12 +728,12 @@ void AlphaBlock4x4::initMaxRGB(const ColorSet & src, float threshold) alpha[i] = unitFloatToFixed8(max(max(x, y), max(z, threshold))); weights[i] = 1.0f; } -} +}*/ -void AlphaBlock4x4::initWeights(const ColorSet & src) +/*void AlphaBlock4x4::initWeights(const ColorSet & src) { for (int i = 0; i < 16; i++) { weights[i] = src.weight(i); } -} +}*/ diff --git a/src/nvimage/ColorBlock.h b/src/nvimage/ColorBlock.h index fe78a47..f87cb6d 100644 --- a/src/nvimage/ColorBlock.h +++ b/src/nvimage/ColorBlock.h @@ -81,7 +81,7 @@ namespace nv return m_color[y * 4 + x]; } - + /* struct ColorSet { ColorSet() : colorCount(0), indexCount(0), w(0), h(0) {} @@ -124,6 +124,7 @@ namespace nv float weights[16]; // @@ Add mask to indicate what color components are weighted? int indices[16]; }; + */ /// Uncompressed 4x4 alpha block. @@ -131,10 +132,10 @@ namespace nv { void init(uint8 value); void init(const ColorBlock & src, uint channel); - void init(const ColorSet & src, uint channel); + //void init(const ColorSet & src, uint channel); - void initMaxRGB(const ColorSet & src, float threshold); - void initWeights(const ColorSet & src); + //void initMaxRGB(const ColorSet & src, float threshold); + //void initWeights(const ColorSet & src); uint8 alpha[4*4]; float weights[16]; diff --git a/src/nvimage/DirectDrawSurface.cpp b/src/nvimage/DirectDrawSurface.cpp index dff9255..a6bbdad 100644 --- a/src/nvimage/DirectDrawSurface.cpp +++ b/src/nvimage/DirectDrawSurface.cpp @@ -31,6 +31,7 @@ #include "nvcore/Utils.h" // max #include "nvcore/StdStream.h" #include "nvmath/Vector.inl" +#include "nvmath/ftoi.h" #include // memset @@ -1395,20 +1396,20 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba) { BlockBC6 block; *stream << block; - ColorSet set; - block.decodeBlock(&set); + Vector3 colors[16]; + block.decodeBlock(colors); // Clamp to [0, 1] and round to 8-bit for (int y = 0; y < 4; ++y) { for (int x = 0; x < 4; ++x) { - Vector4 px = set.colors[y*4 + x]; + Vector3 px = colors[y*4 + x]; rgba->color(x, y).setRGBA( - uint8(clamp(px.x, 0.0f, 1.0f) * 255.0f + 0.5f), - uint8(clamp(px.y, 0.0f, 1.0f) * 255.0f + 0.5f), - uint8(clamp(px.z, 0.0f, 1.0f) * 255.0f + 0.5f), - uint8(clamp(px.w, 0.0f, 1.0f) * 255.0f + 0.5f)); + ftoi_round(clamp(px.x, 0.0f, 1.0f) * 255.0f), + ftoi_round(clamp(px.y, 0.0f, 1.0f) * 255.0f), + ftoi_round(clamp(px.z, 0.0f, 1.0f) * 255.0f), + 0xFF); } } } diff --git a/src/nvimage/ErrorMetric.cpp b/src/nvimage/ErrorMetric.cpp index 7a4970b..3f10a72 100644 --- a/src/nvimage/ErrorMetric.cpp +++ b/src/nvimage/ErrorMetric.cpp @@ -10,7 +10,7 @@ using namespace nv; -float nv::rmsColorError(const FloatImage * img, const FloatImage * ref, bool alphaWeight) +float nv::rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight) { if (!sameLayout(img, ref)) { return FLT_MAX; @@ -23,31 +23,31 @@ float nv::rmsColorError(const FloatImage * img, const FloatImage * ref, bool alp const uint count = img->pixelCount(); for (uint i = 0; i < count; i++) { - float r0 = img->pixel(i + count * 0); - float g0 = img->pixel(i + count * 1); - float b0 = img->pixel(i + count * 2); - //float a0 = img->pixel(i + count * 3); - float r1 = ref->pixel(i + count * 0); - float g1 = ref->pixel(i + count * 1); - float b1 = ref->pixel(i + count * 2); - float a1 = ref->pixel(i + count * 3); + float r0 = ref->pixel(i + count * 0); + float g0 = ref->pixel(i + count * 1); + float b0 = ref->pixel(i + count * 2); + float a0 = ref->pixel(i + count * 3); + float r1 = img->pixel(i + count * 0); + float g1 = img->pixel(i + count * 1); + float b1 = img->pixel(i + count * 2); + //float a1 = img->pixel(i + count * 3); float r = r0 - r1; float g = g0 - g1; float b = b0 - b1; float a = 1; - if (alphaWeight) a = a1; + if (alphaWeight) a = a0 * a0; // @@ a0*a1 or a0*a0 ? - mse += r * r * a; - mse += g * g * a; - mse += b * b * a; + mse += (r * r) * a; + mse += (g * g) * a; + mse += (b * b) * a; } return float(sqrt(mse / count)); } -float nv::rmsAlphaError(const FloatImage * img, const FloatImage * ref) +float nv::rmsAlphaError(const FloatImage * ref, const FloatImage * img) { if (!sameLayout(img, ref)) { return FLT_MAX; @@ -71,7 +71,7 @@ float nv::rmsAlphaError(const FloatImage * img, const FloatImage * ref) } -float nv::averageColorError(const FloatImage * img, const FloatImage * ref, bool alphaWeight) +float nv::averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight) { if (!sameLayout(img, ref)) { return FLT_MAX; @@ -108,7 +108,7 @@ float nv::averageColorError(const FloatImage * img, const FloatImage * ref, bool return float(mae / count); } -float nv::averageAlphaError(const FloatImage * img, const FloatImage * ref) +float nv::averageAlphaError(const FloatImage * ref, const FloatImage * img) { if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) { return FLT_MAX; diff --git a/src/nvimage/ErrorMetric.h b/src/nvimage/ErrorMetric.h index 158dacf..b875802 100644 --- a/src/nvimage/ErrorMetric.h +++ b/src/nvimage/ErrorMetric.h @@ -6,15 +6,15 @@ namespace nv { class FloatImage; - float rmsColorError(const FloatImage * img, const FloatImage * ref, bool alphaWeight); - float rmsAlphaError(const FloatImage * img, const FloatImage * ref); + float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); + float rmsAlphaError(const FloatImage * ref, const FloatImage * img); - float cieLabError(const FloatImage * img, const FloatImage * ref); - float cieLab94Error(const FloatImage * img, const FloatImage * ref); - float spatialCieLabError(const FloatImage * img, const FloatImage * ref); + float cieLabError(const FloatImage * ref, const FloatImage * img); + float cieLab94Error(const FloatImage * ref, const FloatImage * img); + float spatialCieLabError(const FloatImage * ref, const FloatImage * img); - float averageColorError(const FloatImage * img, const FloatImage * ref, bool alphaWeight); - float averageAlphaError(const FloatImage * img, const FloatImage * ref); + float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); + float averageAlphaError(const FloatImage * ref, const FloatImage * img); float averageAngularError(const FloatImage * img0, const FloatImage * img1); float rmsAngularError(const FloatImage * img0, const FloatImage * img1); diff --git a/src/nvmath/PackedFloat.cpp b/src/nvmath/PackedFloat.cpp new file mode 100755 index 0000000..3327d20 --- /dev/null +++ b/src/nvmath/PackedFloat.cpp @@ -0,0 +1,61 @@ +// This code is in the public domain -- Ignacio Castaņo + +#include "PackedFloat.h" +#include "Vector.inl" +#include "ftoi.h" + +using namespace nv; + +Vector3 nv::rgb9e5_to_vector3(FloatRGB9E5 v) { +} + +FloatRGB9E5 nv::vector3_to_rgb9e5(const Vector3 & v) { +} + + +float nv::float11_to_float32(uint v) { +} + +float nv::float10_to_float32(uint v) { +} + +Vector3 nv::r11g11b10_to_vector3(FloatR11G11B10 v) { +} + +FloatR11G11B10 nv::vector3_to_r11g11b10(const Vector3 & v) { +} + +// These are based on: +// http://www.graphics.cornell.edu/~bjw/rgbe/rgbe.c +// While this may not be the best way to encode/decode RGBE8, I'm not making any changes to maintain compatibility. +FloatRGBE8 nv::vector3_to_rgbe8(const Vector3 & v) { + + float m = max3(v.x, v.y, v.z); + + FloatRGBE8 rgbe; + + if (m < 1e-32) { + rgbe.v = 0; + } + else { + int e; + float scale = frexpf(m, &e) * 256.0f / m; + rgbe.r = U8(ftoi_round(v.x * scale)); + rgbe.g = U8(ftoi_round(v.y * scale)); + rgbe.b = U8(ftoi_round(v.z * scale)); + rgbe.e = U8(e + 128); + } + + return rgbe; +} + + +Vector3 nv::rgbe8_to_vector3(FloatRGBE8 v) { + if (v.e != 0) { + float scale = ldexpf(1.0f, v.e-(int)(128+8)); // +8 to divide by 256. @@ Shouldn't we divide by 255 instead? + return scale * Vector3(float(v.r), float(v.g), float(v.b)); + } + + return Vector3(0); +} + diff --git a/src/nvmath/PackedFloat.h b/src/nvmath/PackedFloat.h new file mode 100755 index 0000000..bf84b85 --- /dev/null +++ b/src/nvmath/PackedFloat.h @@ -0,0 +1,79 @@ +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_MATH_PACKEDFLOAT_H +#define NV_MATH_PACKEDFLOAT_H + +#include "nvmath.h" +#include "Vector.h" + +namespace nv +{ + + union FloatRGB9E5 { + uint32 v; + struct { + #if NV_BIG_ENDIAN + uint32 e : 5; + uint32 zm : 9; + uint32 ym : 9; + uint32 xm : 9; + #else + uint32 xm : 9; + uint32 ym : 9; + uint32 zm : 9; + uint32 e : 5; + #endif + }; + }; + + union FloatR11G11B10 { + uint32 v; + struct { + #if NV_BIG_ENDIAN + uint32 ze : 5; + uint32 zm : 5; + uint32 ye : 5; + uint32 ym : 6; + uint32 xe : 5; + uint32 xm : 6; + #else + uint32 xm : 6; + uint32 xe : 5; + uint32 ym : 6; + uint32 ye : 5; + uint32 zm : 5; + uint32 ze : 5; + #endif + }; + }; + + union FloatRGBE8 { + uint32 v; + struct { + #if NV_LITTLE_ENDIAN + uint8 r, g, b, e; + #else + uint8 e: 8; + uint8 b: 8; + uint8 g: 8; + uint8 r: 8; + #endif + }; + }; + + NVMATH_API Vector3 rgb9e5_to_vector3(FloatRGB9E5 v); + NVMATH_API FloatRGB9E5 vector3_to_rgb9e5(const Vector3 & v); + + NVMATH_API float float11_to_float32(uint v); + NVMATH_API float float10_to_float32(uint v); + + NVMATH_API Vector3 r11g11b10_to_vector3(FloatR11G11B10 v); + NVMATH_API FloatR11G11B10 vector3_to_r11g11b10(const Vector3 & v); + + NVMATH_API Vector3 rgbe8_to_vector3(FloatRGBE8 v); + NVMATH_API FloatRGBE8 vector3_to_rgbe8(const Vector3 & v); + +} // nv + +#endif // NV_MATH_PACKEDFLOAT_H diff --git a/src/nvmath/SimdVector.h b/src/nvmath/SimdVector.h index b6b7298..94e5186 100644 --- a/src/nvmath/SimdVector.h +++ b/src/nvmath/SimdVector.h @@ -2,29 +2,6 @@ #include "Vector.h" // Vector3, Vector4 -// Set some reasonable defaults. -#ifndef NV_USE_ALTIVEC -# define NV_USE_ALTIVEC NV_CPU_PPC -//# define NV_USE_ALTIVEC defined(__VEC__) -#endif - -#ifndef NV_USE_SSE -# if NV_CPU_X86 || NV_CPU_X86_64 -# define NV_USE_SSE 2 -# endif -# if defined(__SSE2__) -# define NV_USE_SSE 2 -# elif defined(__SSE__) -# define NV_USE_SSE 1 -# else -# define NV_USE_SSE 0 -# endif -#endif - -// Internally set NV_USE_SIMD when either altivec or sse is available. -#if NV_USE_ALTIVEC && NV_USE_SSE -# error "Cannot enable both altivec and sse!" -#endif #if NV_USE_ALTIVEC # include "SimdVector_VE.h" diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h index 9626431..baeb02a 100644 --- a/src/nvmath/nvmath.h +++ b/src/nvmath/nvmath.h @@ -283,6 +283,49 @@ namespace nv f.value = x; return (f.field.biasedexponent - 127); } + + + // FloatRGB9E5 + union Float3SE { + uint32 v; + struct { + #if NV_BIG_ENDIAN + uint32 e : 5; + uint32 zm : 9; + uint32 ym : 9; + uint32 xm : 9; + #else + uint32 xm : 9; + uint32 ym : 9; + uint32 zm : 9; + uint32 e : 5; + #endif + }; + }; + + // FloatR11G11B10 + union Float3PK { + uint32 v; + struct { + #if NV_BIG_ENDIAN + uint32 ze : 5; + uint32 zm : 5; + uint32 ye : 5; + uint32 ym : 6; + uint32 xe : 5; + uint32 xm : 6; + #else + uint32 xm : 6; + uint32 xe : 5; + uint32 ym : 6; + uint32 ye : 5; + uint32 zm : 5; + uint32 ze : 5; + #endif + }; + }; + + } // nv #endif // NV_MATH_H diff --git a/src/nvthread/Atomic.h b/src/nvthread/Atomic.h index ece44b5..6c2e0fa 100644 --- a/src/nvthread/Atomic.h +++ b/src/nvthread/Atomic.h @@ -14,6 +14,7 @@ #pragma intrinsic(_InterlockedIncrement, _InterlockedDecrement) #pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange) +//#pragma intrinsic(_InterlockedExchangeAdd64) /* extern "C" @@ -147,6 +148,11 @@ namespace nv { return (uint32)_InterlockedExchange((long *)value, (long)desired); } + inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return (uint32)_InterlockedExchangeAdd((long*)value, (long)value_to_add); + } + #elif NV_CC_CLANG && (NV_OS_IOS || NV_OS_DARWIN) NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long)); @@ -177,14 +183,14 @@ namespace nv { inline uint32 atomicIncrement(uint32 * value) { nvDebugCheck((intptr_t(value) & 3) == 0); - + return __sync_add_and_fetch(value, 1); } - + inline uint32 atomicDecrement(uint32 * value) { nvDebugCheck((intptr_t(value) & 3) == 0); - + return __sync_sub_and_fetch(value, 1); } @@ -204,6 +210,12 @@ namespace nv { return __sync_lock_test_and_set(value, desired); } + inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_add_and_fetch(value, value_to_add); + } + + #elif NV_CC_CLANG && POSH_CPU_STRONGARM NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long)); @@ -288,6 +300,12 @@ namespace nv { // this is confusingly named, it doesn't actually do a test but always sets return __sync_lock_test_and_set(value, desired); } + + inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_add_and_fetch(value, value_to_add); + } + #else #error "Atomics not implemented." diff --git a/src/nvthread/Event.cpp b/src/nvthread/Event.cpp index 98a4bcc..92903a8 100644 --- a/src/nvthread/Event.cpp +++ b/src/nvthread/Event.cpp @@ -60,7 +60,7 @@ void Event::post() { //ACS: move this after the unlock? if(m->wait_count>0) { - pthread_cond_signal(&m->pt_cond); + pthread_cond_signal(&m->pt_cond); } pthread_mutex_unlock(&m->pt_mutex); @@ -71,7 +71,7 @@ void Event::wait() { while(m->count==0) { m->wait_count++; - pthread_cond_wait(&m->pt_cond, &m->pt_mutex); + pthread_cond_wait(&m->pt_cond, &m->pt_mutex); m->wait_count--; } m->count--; diff --git a/src/nvthread/Mutex.cpp b/src/nvthread/Mutex.cpp index b657c2e..9d4aa66 100644 --- a/src/nvthread/Mutex.cpp +++ b/src/nvthread/Mutex.cpp @@ -13,6 +13,11 @@ #endif // NV_OS +#if NV_USE_TELEMETRY +#include +extern HTELEMETRY tmContext; +#endif + using namespace nv; @@ -20,12 +25,17 @@ using namespace nv; struct Mutex::Private { CRITICAL_SECTION mutex; + const char * name; }; -Mutex::Mutex () : m(new Private) +Mutex::Mutex (const char * name) : m(new Private) { InitializeCriticalSection(&m->mutex); + m->name = name; +#if NV_USE_TELEMETRY + tmLockName(tmContext, this, name); +#endif } Mutex::~Mutex () @@ -35,16 +45,44 @@ Mutex::~Mutex () void Mutex::lock() { +#if NV_USE_TELEMETRY + TmU64 matcher; + tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked"); +#endif + EnterCriticalSection(&m->mutex); + +#if NV_USE_TELEMETRY + tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_SUCCESS); + tmSetLockState(tmContext, this, TMLS_LOCKED, "acquired"); +#endif } bool Mutex::tryLock() { +#if NV_USE_TELEMETRY + TmU64 matcher; + tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked"); + if (TryEnterCriticalSection(&m->mutex) != 0) { + tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_SUCCESS); + tmSetLockState(tmContext, this, TMLS_LOCKED, "acquired"); + return true; + } + else { + tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_FAILED); + return false; + } +#else return TryEnterCriticalSection(&m->mutex) != 0; +#endif } void Mutex::unlock() { +#if NV_USE_TELEMETRY + tmSetLockState(tmContext, this, TMLS_RELEASED, "released"); +#endif + LeaveCriticalSection(&m->mutex); } @@ -52,12 +90,14 @@ void Mutex::unlock() struct Mutex::Private { pthread_mutex_t mutex; + const char * name; }; -Mutex::Mutex () : m(new Private) +Mutex::Mutex (const char * name) : m(new Private) { - int result = pthread_mutex_init(&m->mutex , NULL); + int result = pthread_mutex_init(&m->mutex, NULL); + m->name = name; nvDebugCheck(result == 0); } diff --git a/src/nvthread/Mutex.h b/src/nvthread/Mutex.h index 53aeb60..3259b9a 100644 --- a/src/nvthread/Mutex.h +++ b/src/nvthread/Mutex.h @@ -15,7 +15,7 @@ namespace nv { NV_FORBID_COPY(Mutex); public: - Mutex (); + Mutex (const char * name); ~Mutex (); void lock(); diff --git a/src/nvthread/ParallelFor.cpp b/src/nvthread/ParallelFor.cpp index 216c6d2..c8e901e 100644 --- a/src/nvthread/ParallelFor.cpp +++ b/src/nvthread/ParallelFor.cpp @@ -38,7 +38,7 @@ ParallelFor::~ParallelFor() { #endif } -void ParallelFor::run(uint count) { +void ParallelFor::run(uint count, bool calling_thread_process_work /*=false*/) { #if ENABLE_PARALLEL_FOR storeRelease(&this->count, count); @@ -48,6 +48,10 @@ void ParallelFor::run(uint count) { // Start threads. pool->start(worker, this); + if (calling_thread_process_work) { + worker(this); + } + // Wait for all threads to complete. pool->wait(); diff --git a/src/nvthread/ParallelFor.h b/src/nvthread/ParallelFor.h index e3e0fb8..b442dc6 100644 --- a/src/nvthread/ParallelFor.h +++ b/src/nvthread/ParallelFor.h @@ -18,7 +18,7 @@ namespace nv ParallelFor(ForTask * task, void * context); ~ParallelFor(); - void run(uint count); + void run(uint count, bool calling_thread_process_work = false); // Invariant: ForTask * task; diff --git a/src/nvthread/Thread.cpp b/src/nvthread/Thread.cpp index 6c16ad8..b9e3bc3 100644 --- a/src/nvthread/Thread.cpp +++ b/src/nvthread/Thread.cpp @@ -9,6 +9,12 @@ #include // usleep #endif +#if NV_USE_TELEMETRY +#include +extern HTELEMETRY tmContext; +#endif + + using namespace nv; struct Thread::Private @@ -21,6 +27,7 @@ struct Thread::Private ThreadFunc * func; void * arg; + const char * name; }; @@ -32,6 +39,39 @@ unsigned long __stdcall threadFunc(void * arg) { return 0; } +// SetThreadName implementation from msdn: +// http://msdn.microsoft.com/en-us/library/xcb2z8hs.aspx + +const DWORD MS_VC_EXCEPTION=0x406D1388; + +#pragma pack(push,8) +typedef struct tagTHREADNAME_INFO +{ + DWORD dwType; // Must be 0x1000. + LPCSTR szName; // Pointer to name (in user addr space). + DWORD dwThreadID; // Thread ID (-1=caller thread). + DWORD dwFlags; // Reserved for future use, must be zero. +} THREADNAME_INFO; +#pragma pack(pop) + +static void setThreadName(DWORD dwThreadID, const char* threadName) +{ + THREADNAME_INFO info; + info.dwType = 0x1000; + info.szName = threadName; + info.dwThreadID = dwThreadID; + info.dwFlags = 0; + + __try + { + RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info ); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + } +} + + #elif NV_OS_USE_PTHREAD extern "C" void * threadFunc(void * arg) { @@ -46,6 +86,13 @@ extern "C" void * threadFunc(void * arg) { Thread::Thread() : p(new Private) { p->thread = 0; + p->name = NULL; +} + +Thread::Thread(const char * const name) : p(new Private) +{ + p->thread = 0; + p->name = name; } Thread::~Thread() @@ -59,9 +106,20 @@ void Thread::start(ThreadFunc * func, void * arg) p->arg = arg; #if NV_OS_WIN32 - p->thread = CreateThread(NULL, 0, threadFunc, p.ptr(), 0, NULL); + DWORD threadId; + p->thread = CreateThread(NULL, 0, threadFunc, p.ptr(), 0, &threadId); //p->thread = (HANDLE)_beginthreadex (0, 0, threadFunc, p.ptr(), 0, NULL); // @@ So that we can call CRT functions... nvDebugCheck(p->thread != NULL); + setThreadName(threadId, p->name); +#if NV_USE_TELEMETRY + tmThreadName(tmContext, threadId, p->name); +#endif +#elif NV_OS_ORBIS + int ret = scePthreadCreate(&p->thread, NULL, threadFunc, p.ptr(), p->name ? p->name : "nv::Thread"); + nvDebugCheck(ret == 0); + // use any non-system core + scePthreadSetaffinity(p->thread, 0x3F); + scePthreadSetprio(p->thread, (SCE_KERNEL_PRIO_FIFO_DEFAULT + SCE_KERNEL_PRIO_FIFO_HIGHEST) / 2); #elif NV_OS_USE_PTHREAD int result = pthread_create(&p->thread, NULL, threadFunc, p.ptr()); nvDebugCheck(result == 0); diff --git a/src/nvthread/Thread.h b/src/nvthread/Thread.h index 48fe800..5e0f0e2 100644 --- a/src/nvthread/Thread.h +++ b/src/nvthread/Thread.h @@ -17,6 +17,7 @@ namespace nv NV_FORBID_COPY(Thread); public: Thread(); + Thread(const char * const name); ~Thread(); void start(ThreadFunc * func, void * arg); diff --git a/src/nvthread/ThreadPool.cpp b/src/nvthread/ThreadPool.cpp index 8364c62..53667ae 100644 --- a/src/nvthread/ThreadPool.cpp +++ b/src/nvthread/ThreadPool.cpp @@ -14,7 +14,7 @@ using namespace nv; #if PROTECT_THREAD_POOL -Mutex s_pool_mutex; +Mutex s_pool_mutex("thread pool"); #endif AutoPtr s_pool; diff --git a/src/nvthread/ThreadPool.h b/src/nvthread/ThreadPool.h index f1bd620..fb75b6d 100644 --- a/src/nvthread/ThreadPool.h +++ b/src/nvthread/ThreadPool.h @@ -12,7 +12,7 @@ // The thread pool creates one worker thread for each physical core. // The threads are idle waiting for their start events so that they do not consume any resources while inactive. // The thread pool runs the same function in all worker threads, the idea is to use this as the foundation of a custom task scheduler. -// When the thread pool starts, the main thread continues running, but the common use case is to inmmediately wait of the termination events of the worker threads. +// When the thread pool starts, the main thread continues running, but the common use case is to inmmediately wait for the termination events of the worker threads. // @@ The start and wait methods could probably be merged. namespace nv { diff --git a/src/nvthread/nvthread.cpp b/src/nvthread/nvthread.cpp index 9de9a81..fe20592 100644 --- a/src/nvthread/nvthread.cpp +++ b/src/nvthread/nvthread.cpp @@ -72,3 +72,10 @@ uint nv::hardwareThreadCount() { #endif } +uint nv::threadId() { +#if NV_OS_WIN32 + return GetCurrentThreadId(); +#else + return 0; // @@ +#endif +} \ No newline at end of file diff --git a/src/nvthread/nvthread.h b/src/nvthread/nvthread.h index aa236d3..c246b57 100644 --- a/src/nvthread/nvthread.h +++ b/src/nvthread/nvthread.h @@ -90,6 +90,8 @@ namespace nv void shutWorkers(); void setWorkerFunction(void * func); + uint threadId(); + } // nv namespace diff --git a/src/nvtt/BlockCompressor.cpp b/src/nvtt/BlockCompressor.cpp index 6998e1b..6b39636 100644 --- a/src/nvtt/BlockCompressor.cpp +++ b/src/nvtt/BlockCompressor.cpp @@ -113,22 +113,23 @@ void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c */ -struct ColorBlockCompressorContext +struct CompressorContext { nvtt::AlphaMode alphaMode; - uint w, h; + uint w, h, d; const float * data; const nvtt::CompressionOptions::Private * compressionOptions; uint bw, bh, bs; uint8 * mem; - ColorBlockCompressor * compressor; + CompressorInterface * compressor; }; + // Each task compresses one block. void ColorBlockCompressorTask(void * data, int i) { - ColorBlockCompressorContext * d = (ColorBlockCompressorContext *) data; + CompressorContext * d = (CompressorContext *) data; uint x = i % d->bw; uint y = i / d->bw; @@ -139,7 +140,7 @@ void ColorBlockCompressorTask(void * data, int i) rgba.init(d->w, d->h, d->data, 4*x, 4*y); uint8 * ptr = d->mem + (y * d->bw + x) * d->bs; - d->compressor->compressBlock(rgba, d->alphaMode, *d->compressionOptions, ptr); + ((ColorBlockCompressor *) d->compressor)->compressBlock(rgba, d->alphaMode, *d->compressionOptions, ptr); } } @@ -147,10 +148,11 @@ void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u { nvDebugCheck(d == 1); - ColorBlockCompressorContext context; + CompressorContext context; context.alphaMode = alphaMode; context.w = w; context.h = h; + context.d = d; context.data = data; context.compressionOptions = &compressionOptions; @@ -181,23 +183,11 @@ void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u } -struct ColorSetCompressorContext -{ - nvtt::AlphaMode alphaMode; - uint w, h; - const float * data; - const nvtt::CompressionOptions::Private * compressionOptions; - - uint bw, bh, bs; - uint8 * mem; - ColorSetCompressor * compressor; -}; - - +#if 0 // Each task compresses one block. void ColorSetCompressorTask(void * data, int i) { - ColorSetCompressorContext * d = (ColorSetCompressorContext *) data; + CompressorContext * d = (CompressorContext *) data; uint x = i % d->bw; uint y = i / d->bw; @@ -208,7 +198,7 @@ void ColorSetCompressorTask(void * data, int i) set.setColors(d->data, d->w, d->h, x * 4, y * 4); uint8 * ptr = d->mem + (y * d->bw + x) * d->bs; - d->compressor->compressBlock(set, d->alphaMode, *d->compressionOptions, ptr); + ((ColorSetCompressor *)d->compressor)->compressBlock(set, d->alphaMode, *d->compressionOptions, ptr); } } @@ -217,7 +207,7 @@ void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, c { nvDebugCheck(d == 1); - ColorSetCompressorContext context; + CompressorContext context; context.alphaMode = alphaMode; context.w = w; context.h = h; @@ -249,3 +239,97 @@ void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, c delete [] context.mem; } +#endif // 0 + + +// Each task compresses one block. +void FloatColorCompressorTask(void * data, int i) +{ + CompressorContext * d = (CompressorContext *) data; + + // Copy image to block. + const uint block_x = (i % d->bw); + const uint block_y = (i / d->bw); + + const uint src_x_offset = block_x * 4; + const uint src_y_offset = block_y * 4; + + const float * r = (const float *)d->data + d->w * d->h * d->d * 0; + const float * g = (const float *)d->data + d->w * d->h * d->d * 1; + const float * b = (const float *)d->data + d->w * d->h * d->d * 2; + const float * a = (const float *)d->data + d->w * d->h * d->d * 3; + + Vector4 colors[16]; + float weights[16]; + + const uint block_w = min(d->w, 4U); + const uint block_h = min(d->h, 4U); + + uint x, y; + for (y = 0; y < block_h; y++) { + for (x = 0; x < block_w; x++) { + uint dst_idx = 4 * y + x; + uint src_idx = (y + src_y_offset) * d->w + (x + src_x_offset); + colors[dst_idx].x = r[src_idx]; + colors[dst_idx].y = g[src_idx]; + colors[dst_idx].z = b[src_idx]; + colors[dst_idx].w = a[src_idx]; + weights[dst_idx] = (d->alphaMode == nvtt::AlphaMode_Transparency) ? a[src_idx] : 1.0f; + } + for (; x < 4; x++) { + uint dst_idx = 4 * y + x; + colors[dst_idx] = Vector4(0); + weights[dst_idx] = 0.0f; + } + } + for (; y < 4; y++) { + for (x = 0; x < 4; x++) { + uint dst_idx = 4 * y + x; + colors[dst_idx] = Vector4(0); + weights[dst_idx] = 0.0f; + } + } + + // Compress block. + uint8 * output = d->mem + (block_y * d->bw + block_x) * d->bs; + ((FloatColorCompressor *)d->compressor)->compressBlock(colors, weights, *d->compressionOptions, output); +} + + +void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); // @@ Add support for compressed 3D textures. + + CompressorContext context; + context.alphaMode = alphaMode; + context.w = w; + context.h = h; + context.d = d; + context.data = data; + context.compressionOptions = &compressionOptions; + + context.bs = blockSize(); + context.bw = (w + 3) / 4; + context.bh = (h + 3) / 4; + + context.compressor = this; + + SequentialTaskDispatcher sequential; + + // Use a single thread to compress small textures. + if (context.bh < 4) dispatcher = &sequential; + +#if _DEBUG + dispatcher = &sequential; +#endif + + const uint count = context.bw * context.bh; + const uint size = context.bs * count; + context.mem = new uint8[size]; + + dispatcher->dispatch(FloatColorCompressorTask, &context, count); + + outputOptions.writeData(context.mem, size); + + delete [] context.mem; +} diff --git a/src/nvtt/BlockCompressor.h b/src/nvtt/BlockCompressor.h index cc829ce..7514bde 100644 --- a/src/nvtt/BlockCompressor.h +++ b/src/nvtt/BlockCompressor.h @@ -30,8 +30,8 @@ namespace nv { - struct ColorSet; struct ColorBlock; + class Vector4; struct ColorBlockCompressor : public CompressorInterface { @@ -41,11 +41,11 @@ namespace nv virtual uint blockSize() const = 0; }; - struct ColorSetCompressor : public CompressorInterface + struct FloatColorCompressor : public CompressorInterface { virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; + virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; virtual uint blockSize() const = 0; }; diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt index 932eaf2..2cc78bc 100644 --- a/src/nvtt/CMakeLists.txt +++ b/src/nvtt/CMakeLists.txt @@ -12,6 +12,7 @@ SET(NVTT_SRCS CompressorDX10.h CompressorDX10.cpp CompressorDX11.h CompressorDX11.cpp CompressorDXT1.h CompressorDXT1.cpp + CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp CompressorRGB.h CompressorRGB.cpp Context.h Context.cpp QuickCompressDXT.h QuickCompressDXT.cpp diff --git a/src/nvtt/ClusterFit.cpp b/src/nvtt/ClusterFit.cpp index 7b91e2b..b3b2f1a 100644 --- a/src/nvtt/ClusterFit.cpp +++ b/src/nvtt/ClusterFit.cpp @@ -38,7 +38,7 @@ ClusterFit::ClusterFit() { } -// @@ Deprecate. Do not use color set directly. +#if 0 // @@ Deprecate. Do not use color set directly. void ClusterFit::setColorSet(const ColorSet * set) { // initialise the best error @@ -108,6 +108,7 @@ void ClusterFit::setColorSet(const ColorSet * set) #endif } } +#endif // 0 void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count) diff --git a/src/nvtt/ClusterFit.h b/src/nvtt/ClusterFit.h index 72c9ef9..4f29680 100644 --- a/src/nvtt/ClusterFit.h +++ b/src/nvtt/ClusterFit.h @@ -43,7 +43,7 @@ namespace nv { public: ClusterFit(); - void setColorSet(const ColorSet * set); + //void setColorSet(const ColorSet * set); void setColorSet(const Vector3 * colors, const float * weights, int count); void setColorWeights(const Vector4 & w); diff --git a/src/nvtt/CompressionOptions.cpp b/src/nvtt/CompressionOptions.cpp index 3951c32..a899a67 100644 --- a/src/nvtt/CompressionOptions.cpp +++ b/src/nvtt/CompressionOptions.cpp @@ -248,7 +248,7 @@ unsigned int CompressionOptions::d3d9Format() const 0, // Format_CTX1 MAKEFOURCC('B', 'C', '6', 'H'), // Format_BC6 MAKEFOURCC('B', 'C', '7', 'L'), // Format_BC7 - FOURCC_ATI2, // Format_BC5_Luma + //FOURCC_ATI2, // Format_BC5_Luma FOURCC_DXT5, // Format_BC3_RGBM }; diff --git a/src/nvtt/CompressorDX10.cpp b/src/nvtt/CompressorDX10.cpp index d823db8..7a7842d 100644 --- a/src/nvtt/CompressorDX10.cpp +++ b/src/nvtt/CompressorDX10.cpp @@ -85,6 +85,7 @@ void ProductionCompressorBC5::compressBlock(ColorBlock & src, nvtt::AlphaMode al } +#if 0 void ProductionCompressorBC5_Luma::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) { BlockATI2 * block = new(output) BlockATI2; @@ -118,3 +119,4 @@ void ProductionCompressorBC5_Luma::compressBlock(ColorSet & set, nvtt::AlphaMode OptimalCompress::compressDXT5A(tmp, &block->y); } +#endif // 0 \ No newline at end of file diff --git a/src/nvtt/CompressorDX10.h b/src/nvtt/CompressorDX10.h index 0ea16c3..67addd3 100644 --- a/src/nvtt/CompressorDX10.h +++ b/src/nvtt/CompressorDX10.h @@ -58,11 +58,11 @@ namespace nv virtual uint blockSize() const { return 16; } }; - struct ProductionCompressorBC5_Luma : public ColorSetCompressor + /*struct ProductionCompressorBC5_Luma : public ColorSetCompressor { virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } - }; + };*/ } // nv namespace diff --git a/src/nvtt/CompressorDX11.cpp b/src/nvtt/CompressorDX11.cpp index cf83a69..a349ffa 100644 --- a/src/nvtt/CompressorDX11.cpp +++ b/src/nvtt/CompressorDX11.cpp @@ -39,7 +39,7 @@ using namespace nv; using namespace nvtt; -void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output) +void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output) { // !!!UNDONE: support channel weights // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) @@ -56,57 +56,45 @@ void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const Co } // Convert NVTT's tile struct to ZOH's, and convert float to half. - ZOH::Tile zohTile(tile.w, tile.h); + ZOH::Tile zohTile(4, 4); memset(zohTile.data, 0, sizeof(zohTile.data)); memset(zohTile.importance_map, 0, sizeof(zohTile.importance_map)); - for (uint y = 0; y < tile.h; ++y) + for (uint y = 0; y < 4; ++y) { - for (uint x = 0; x < tile.w; ++x) + for (uint x = 0; x < 4; ++x) { - Vector4 color = tile.color(x, y); + Vector4 color = colors[4*y+x]; uint16 rHalf = to_half(color.x); uint16 gHalf = to_half(color.y); uint16 bHalf = to_half(color.z); zohTile.data[y][x].x = ZOH::Tile::half2float(rHalf); zohTile.data[y][x].y = ZOH::Tile::half2float(gHalf); zohTile.data[y][x].z = ZOH::Tile::half2float(bHalf); - - if (alphaMode == AlphaMode_Transparency) { - zohTile.importance_map[y][x] = color.w; - } - else { - zohTile.importance_map[y][x] = 1.0f; - } + zohTile.importance_map[y][x] = weights[4*y+x]; } } ZOH::compress(zohTile, (char *)output); } -void CompressorBC7::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output) +void CompressorBC7::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output) { // !!!UNDONE: support channel weights // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) AVPCL::mode_rgb = false; - AVPCL::flag_premult = (alphaMode == AlphaMode_Premultiplied); + AVPCL::flag_premult = false; //(alphaMode == AlphaMode_Premultiplied); AVPCL::flag_nonuniform = false; AVPCL::flag_nonuniform_ati = false; // Convert NVTT's tile struct to AVPCL's. - AVPCL::Tile avpclTile(tile.w, tile.h); + AVPCL::Tile avpclTile(4, 4); memset(avpclTile.data, 0, sizeof(avpclTile.data)); - for (uint y = 0; y < tile.h; ++y) { - for (uint x = 0; x < tile.w; ++x) { - Vector4 color = tile.color(x, y); + for (uint y = 0; y < 4; ++y) { + for (uint x = 0; x < 4; ++x) { + Vector4 color = colors[4*y+x]; avpclTile.data[y][x] = color * 255.0f; - - /*if (alphaMode == AlphaMode_Transparency) { - avpclTile.importance_map[y][x] = color.w; - } - else*/ { - avpclTile.importance_map[y][x] = 1.0f; - } + avpclTile.importance_map[y][x] = 1.0f; //weights[4*y+x]; } } diff --git a/src/nvtt/CompressorDX11.h b/src/nvtt/CompressorDX11.h index 3dda9ea..7afaacb 100644 --- a/src/nvtt/CompressorDX11.h +++ b/src/nvtt/CompressorDX11.h @@ -28,15 +28,15 @@ namespace nv { - struct CompressorBC6 : public ColorSetCompressor + struct CompressorBC6 : public FloatColorCompressor { - virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } }; - struct CompressorBC7 : public ColorSetCompressor + struct CompressorBC7 : public FloatColorCompressor { - virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } }; diff --git a/src/nvtt/CompressorDX9.cpp b/src/nvtt/CompressorDX9.cpp index aaef88d..9cfd7da 100644 --- a/src/nvtt/CompressorDX9.cpp +++ b/src/nvtt/CompressorDX9.cpp @@ -28,6 +28,8 @@ #include "CompressionOptions.h" #include "OutputOptions.h" #include "ClusterFit.h" +#include "CompressorDXT1.h" +#include "CompressorDXT5_RGBM.h" // squish #include "squish/colourset.h" @@ -113,102 +115,13 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha } -namespace nv { - float compress_dxt1(const Vector3 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output); -} - -#if 1 -void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ #if 1 - // @@ This setup is the same for all compressors. - Vector3 input_colors[16]; - float input_weights[16]; - - uint x, y; - for (y = 0; y < set.h; y++) { - for (x = 0; x < set.w; x++) { - input_colors[4*y+x] = set.color(x, y).xyz(); - input_weights[4*y+x] = 1.0f; - if (alphaMode == nvtt::AlphaMode_Transparency) input_weights[4*y+x] = set.color(x, y).z; - } - for (; x < 4; x++) { - input_colors[4*y+x] = Vector3(0); - input_weights[4*y+x] = 0.0f; - } - } - for (; y < 4; y++) { - for (x = 0; x < 4; x++) { - input_colors[4*y+x] = Vector3(0); - input_weights[4*y+x] = 0.0f; - } - } - - compress_dxt1(input_colors, input_weights, compressionOptions.colorWeight.xyz(), (BlockDXT1 *)output); - -#else - set.setUniformWeights(); - set.createMinimalSet(/*ignoreTransparent*/false); - - BlockDXT1 * block = new(output) BlockDXT1; - - if (set.isSingleColor(/*ignoreAlpha*/true)) - { - Color32 c = toColor32(set.colors[0]); - OptimalCompress::compressDXT1(c, block); - } - /*else if (set.colorCount == 2) { - QuickCompress::compressDXT1(..., block); - }*/ - else - { - ClusterFit fit; - fit.setColorWeights(compressionOptions.colorWeight); - fit.setColorSet(&set); - - Vector3 start, end; - fit.compress4(&start, &end); - - if (fit.compress3(&start, &end)) { - QuickCompress::outputBlock3(set, start, end, block); - } - else { - QuickCompress::outputBlock4(set, start, end, block); - } - } -#endif -} -#elif 0 - - -extern void compress_dxt1_bounding_box_exhaustive(const ColorBlock & input, BlockDXT1 * output); - -void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +void CompressorDXT1::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) { - BlockDXT1 * block = new(output) BlockDXT1; - - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), block); - //compress_dxt1_single_color_optimal(rgba.color(0), block); - } - else - { - // Do an exhaustive search inside the bounding box. - compress_dxt1_bounding_box_exhaustive(rgba, block); - } - - /*else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0); - fit.SetColourSet(&colours, nvsquish::kDxt1); - fit.Compress(output); - }*/ + compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output); } + #else void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) { @@ -371,309 +284,13 @@ void CompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode } - - - -void CompressorBC3_RGBM::compressBlock(ColorSet & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +void CompressorBC3_RGBM::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) { - BlockDXT5 * block = new(output)BlockDXT5; - - if (alphaMode == AlphaMode_Transparency) { - src.setAlphaWeights(); - } - else { - src.setUniformWeights(); - } - - // Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors. - - // Compress the resulting M values optimally. - - // Repeat this several times until compression error does not improve? - - //Vector3 rgb_block[16]; - //float m_block[16]; - - - // Init RGB/M block. - const float threshold = 0.15f; // @@ Use compression options. -#if 0 - nvsquish::WeightedClusterFit fit; - - ColorBlock rgba; - for (int i = 0; i < 16; i++) { - const Vector4 & c = src.color(i); - float R = saturate(c.x); - float G = saturate(c.y); - float B = saturate(c.z); - - float M = max(max(R, G), max(B, threshold)); - float r = R / M; - float g = G / M; - float b = B / M; - float a = c.w; - - rgba.color(i) = toColor32(Vector4(r, g, b, a)); - } - - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), &block->color); - } - else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); - - int flags = 0; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); - fit.SetColourSet(&colours, 0); - fit.Compress(&block->color); - } -#endif -#if 1 - ColorSet rgb; - rgb.allocate(src.w, src.h); // @@ Handle smaller blocks. - - if (src.colorCount != 16) { - nvDebugBreak(); - } - - for (uint i = 0; i < src.colorCount; i++) { - const Vector4 & c = src.color(i); - - float R = saturate(c.x); - float G = saturate(c.y); - float B = saturate(c.z); - - float M = max(max(R, G), max(B, threshold)); - float r = R / M; - float g = G / M; - float b = B / M; - float a = c.w; - - rgb.colors[i] = Vector4(r, g, b, a); - rgb.indices[i] = i; - rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set. - } - - rgb.createMinimalSet(/*ignoreTransparent=*/true); - - if (rgb.isSingleColor(/*ignoreAlpha=*/true)) { - OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color); - } - else { - ClusterFit fit; - fit.setColorWeights(compressionOptions.colorWeight); - fit.setColorSet(&rgb); - - Vector3 start, end; - fit.compress4(&start, &end); - - QuickCompress::outputBlock4(rgb, start, end, &block->color); - } -#endif - - // Decompress RGB/M block. - nv::ColorBlock RGB; - block->color.decodeBlock(&RGB); - -#if 1 - AlphaBlock4x4 M; - for (int i = 0; i < 16; i++) { - const Vector4 & c = src.color(i); - float R = saturate(c.x); - float G = saturate(c.y); - float B = saturate(c.z); - - float r = RGB.color(i).r / 255.0f; - float g = RGB.color(i).g / 255.0f; - float b = RGB.color(i).b / 255.0f; - - float m = (R / r + G / g + B / b) / 3.0f; - //float m = max((R / r + G / g + B / b) / 3.0f, threshold); - //float m = max(max(R / r, G / g), max(B / b, threshold)); - //float m = max(max(R, G), max(B, threshold)); - - m = (m - threshold) / (1 - threshold); - - M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); - M.weights[i] = src.weights[i]; - } - - // Compress M. - if (compressionOptions.quality == Quality_Fastest) { - QuickCompress::compressDXT5A(M, &block->alpha); - } - else { - OptimalCompress::compressDXT5A(M, &block->alpha); - } -#else - OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha); -#endif - -#if 0 - // Decompress M. - block->alpha.decodeBlock(&M); - - rgb.allocate(src.w, src.h); // @@ Handle smaller blocks. - - for (uint i = 0; i < src.colorCount; i++) { - const Vector4 & c = src.color(i); - - float R = saturate(c.x); - float G = saturate(c.y); - float B = saturate(c.z); - - //float m = max(max(R, G), max(B, threshold)); - float m = float(M.alpha[i]) / 255.0f * (1 - threshold) + threshold; - float r = R / m; - float g = G / m; - float b = B / m; - float a = c.w; - - rgb.colors[i] = Vector4(r, g, b, a); - rgb.indices[i] = i; - rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set. - } - - rgb.createMinimalSet(/*ignoreTransparent=*/true); - - if (rgb.isSingleColor(/*ignoreAlpha=*/true)) { - OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color); - } - else { - ClusterFit fit; - fit.setMetric(compressionOptions.colorWeight); - fit.setColourSet(&rgb); - - Vector3 start, end; - fit.compress4(&start, &end); - - QuickCompress::outputBlock4(rgb, start, end, &block->color); - } -#endif - -#if 0 - block->color.decodeBlock(&RGB); - - //AlphaBlock4x4 M; - //M.initWeights(src); - - for (int i = 0; i < 16; i++) { - const Vector4 & c = src.color(i); - float R = saturate(c.x); - float G = saturate(c.y); - float B = saturate(c.z); - - float r = RGB.color(i).r / 255.0f; - float g = RGB.color(i).g / 255.0f; - float b = RGB.color(i).b / 255.0f; - - float m = (R / r + G / g + B / b) / 3.0f; - //float m = max((R / r + G / g + B / b) / 3.0f, threshold); - //float m = max(max(R / r, G / g), max(B / b, threshold)); - //float m = max(max(R, G), max(B, threshold)); - - m = (m - threshold) / (1 - threshold); - - M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); - M.weights[i] = src.weights[i]; - } - - // Compress M. - if (compressionOptions.quality == Quality_Fastest) { - QuickCompress::compressDXT5A(M, &block->alpha); - } - else { - OptimalCompress::compressDXT5A(M, &block->alpha); - } -#endif - - - -#if 0 - src.fromRGBM(M, threshold); - - src.createMinimalSet(/*ignoreTransparent=*/true); - - if (src.isSingleColor(/*ignoreAlpha=*/true)) { - OptimalCompress::compressDXT1(src.color(0), &block->color); - } - else { - // @@ Use our improved compressor. - ClusterFit fit; - fit.setMetric(compressionOptions.colorWeight); - fit.setColourSet(&src); - - Vector3 start, end; - fit.compress4(&start, &end); - - if (fit.compress3(&start, &end)) { - QuickCompress::outputBlock3(src, start, end, block->color); - } - else { - QuickCompress::outputBlock4(src, start, end, block->color); - } - } -#endif // 0 - - // @@ Decompress color and compute M that best approximates src with these colors? Then compress M again? - - - - // RGBM encoding. - // Maximize precision. - // - Number of possible grey levels: - // - Naive: 2^3 = 8 - // - Better: 2^3 + 2^2 = 12 - // - How to choose threshold? - // - Ideal = Adaptive per block, don't know where to store. - // - Adaptive per lightmap. How to compute optimal? - // - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range. - - // - Optimal compressor: Interpolation artifacts. - - // - Color transform. - // - Measure error in post-tone-mapping color space. - // - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game. - // - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space. - - // - Enhanced DXT compressor. - // - Typical RGBM encoding as follows: - // rgb -> M = max(rgb), RGB=rgb/M -> RGBM - // - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1. - // - We could ensure that M' >= M during compression. - // - We could clamp RGB anyway. - // - We could add a fixed scale value to take into account compression errors and avoid clamping. - - - - - - // Compress color. - /*if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), &block->color); - } - else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); - - int flags = 0; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); - fit.SetColourSet(&colours, 0); - fit.Compress(&block->color); - }*/ + float min_m = 0.25f; // @@ Get from compression options. + compress_dxt5_rgbm(colors, weights, min_m, (BlockDXT5 *)output); } - #if defined(HAVE_ATITC) void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) diff --git a/src/nvtt/CompressorDX9.h b/src/nvtt/CompressorDX9.h index 33c1112..8a298c6 100644 --- a/src/nvtt/CompressorDX9.h +++ b/src/nvtt/CompressorDX9.h @@ -65,9 +65,9 @@ namespace nv // Normal CPU compressors. #if 1 - struct CompressorDXT1 : public ColorSetCompressor + struct CompressorDXT1 : public FloatColorCompressor { - virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 8; } }; #else @@ -108,9 +108,9 @@ namespace nv virtual uint blockSize() const { return 16; } }; - struct CompressorBC3_RGBM : public ColorSetCompressor + struct CompressorBC3_RGBM : public FloatColorCompressor { - virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } }; diff --git a/src/nvtt/CompressorDXT1.cpp b/src/nvtt/CompressorDXT1.cpp index b5c4b54..08134f8 100644 --- a/src/nvtt/CompressorDXT1.cpp +++ b/src/nvtt/CompressorDXT1.cpp @@ -2,7 +2,6 @@ #include "CompressorDXT1.h" #include "SingleColorLookup.h" #include "ClusterFit.h" -#include "QuickCompressDXT.h" // Deprecate. #include "nvimage/ColorBlock.h" #include "nvimage/BlockDXT.h" @@ -162,12 +161,12 @@ static bool is_single_color_rgb(const Vector3 * colors, const float * weights, i } // Find similar colors and combine them together. -static int reduce_colors(const Vector3 * input_colors, const float * input_weights, Vector3 * colors, float * weights) +static int reduce_colors(const Vector4 * input_colors, const float * input_weights, Vector3 * colors, float * weights) { int n = 0; for (int i = 0; i < 16; i++) { - Vector3 ci = input_colors[i]; + Vector3 ci = input_colors[i].xyz(); float wi = input_weights[i]; if (wi > 0) { @@ -276,7 +275,7 @@ static float evaluate_mse(const BlockDXT1 * output, const Vector3 colors[16]) { } #endif -static float evaluate_mse(const Vector3 colors[16], const float weights[16], const Vector3 & color_weights, const BlockDXT1 * output) { +static float evaluate_mse(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, const BlockDXT1 * output) { Color32 palette[4]; output->evaluatePalette(palette, /*d3d9=*/false); @@ -290,7 +289,7 @@ static float evaluate_mse(const Vector3 colors[16], const float weights[16], con float error = 0.0f; for (int i = 0; i < 16; i++) { int index = (output->indices >> (2 * i)) & 3; - error += weights[i] * evaluate_mse(vector_palette[index], colors[i], color_weights); + error += input_weights[i] * evaluate_mse(vector_palette[index], input_colors[i].xyz(), color_weights); } return error; } @@ -353,14 +352,14 @@ static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) { -static uint compute_indices4(const Vector3 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { +static uint compute_indices4(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { uint indices = 0; for (int i = 0; i < 16; i++) { - float d0 = evaluate_mse(palette[0], input_colors[i], color_weights); - float d1 = evaluate_mse(palette[1], input_colors[i], color_weights); - float d2 = evaluate_mse(palette[2], input_colors[i], color_weights); - float d3 = evaluate_mse(palette[3], input_colors[i], color_weights); + float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); + float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); + float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); + float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); uint b0 = d0 > d3; uint b1 = d1 > d2; @@ -379,14 +378,14 @@ static uint compute_indices4(const Vector3 input_colors[16], const Vector3 & col } -static uint compute_indices(const Vector3 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { +static uint compute_indices(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { uint indices = 0; for (int i = 0; i < 16; i++) { - float d0 = evaluate_mse(palette[0], input_colors[i], color_weights); - float d1 = evaluate_mse(palette[1], input_colors[i], color_weights); - float d2 = evaluate_mse(palette[2], input_colors[i], color_weights); - float d3 = evaluate_mse(palette[3], input_colors[i], color_weights); + float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); + float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); + float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); + float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); uint index; if (d0 < d1 && d0 < d2 && d0 < d3) index = 0; @@ -401,7 +400,7 @@ static uint compute_indices(const Vector3 input_colors[16], const Vector3 & colo } -static void output_block3(const Vector3 input_colors[16], const Vector3 & color_weights, const Vector3 & v0, const Vector3 & v1, BlockDXT1 * block) +static void output_block3(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 & v0, const Vector3 & v1, BlockDXT1 * block) { Color16 color0 = vector3_to_color16(v0); Color16 color1 = vector3_to_color16(v1); @@ -418,7 +417,7 @@ static void output_block3(const Vector3 input_colors[16], const Vector3 & color_ block->indices = compute_indices(input_colors, color_weights, palette); } -static void output_block4(const Vector3 input_colors[16], const Vector3 & color_weights, const Vector3 & v0, const Vector3 & v1, BlockDXT1 * block) +static void output_block4(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 & v0, const Vector3 & v1, BlockDXT1 * block) { Color16 color0 = vector3_to_color16(v0); Color16 color1 = vector3_to_color16(v1); @@ -515,7 +514,7 @@ float nv::compress_dxt1_least_squares_fit(const Vector3 * input_colors, const Ve }*/ -float nv::compress_dxt1_bounding_box_exhaustive(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, int max_volume, BlockDXT1 * output) +float nv::compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int max_volume, BlockDXT1 * output) { // Compute bounding box. Vector3 min_color(1.0f); @@ -586,13 +585,14 @@ float nv::compress_dxt1_bounding_box_exhaustive(const Vector3 input_colors[16], evaluate_palette4(palette); } else { - #if 1 - // Evaluate error in 3 color mode. - evaluate_palette3(palette); - #else - // Skip 3 color mode. - continue; - #endif + if (three_color_mode) { + // Evaluate error in 3 color mode. + evaluate_palette3(palette); + } + else { + // Skip 3 color mode. + continue; + } } float error = evaluate_palette_error(palette, colors32, weights, count); @@ -608,10 +608,6 @@ float nv::compress_dxt1_bounding_box_exhaustive(const Vector3 input_colors[16], output->col0 = best0; output->col1 = best1; - if (output->col0.u < output->col1.u) { - int k = 1; - } - Vector3 vector_palette[4]; evaluate_palette(output->col0, output->col1, vector_palette); @@ -621,7 +617,7 @@ float nv::compress_dxt1_bounding_box_exhaustive(const Vector3 input_colors[16], } -void nv::compress_dxt1_cluster_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output) +void nv::compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output) { ClusterFit fit; fit.setColorWeights(Vector4(color_weights, 1)); @@ -631,7 +627,7 @@ void nv::compress_dxt1_cluster_fit(const Vector3 input_colors[16], const Vector3 Vector3 start, end; fit.compress4(&start, &end); - if (fit.compress3(&start, &end)) { + if (three_color_mode && fit.compress3(&start, &end)) { output_block3(input_colors, color_weights, start, end, output); } else { @@ -642,7 +638,7 @@ void nv::compress_dxt1_cluster_fit(const Vector3 input_colors[16], const Vector3 -float nv::compress_dxt1(const Vector3 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output) +float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output) { Vector3 colors[16]; float weights[16]; @@ -674,7 +670,7 @@ float nv::compress_dxt1(const Vector3 input_colors[16], const float input_weight // If high quality: if (0) { BlockDXT1 exhaustive_output; - float exhaustive_error = compress_dxt1_bounding_box_exhaustive(input_colors, colors, weights, count, color_weights, 400, &exhaustive_output); + float exhaustive_error = compress_dxt1_bounding_box_exhaustive(input_colors, colors, weights, count, color_weights, three_color_mode, 1400, &exhaustive_output); if (exhaustive_error != FLT_MAX) { float exhaustive_error2 = evaluate_mse(input_colors, input_weights, color_weights, &exhaustive_output); @@ -700,7 +696,7 @@ float nv::compress_dxt1(const Vector3 input_colors[16], const float input_weight if (count > 1) { BlockDXT1 cluster_fit_output; - compress_dxt1_cluster_fit(input_colors, colors, weights, count, color_weights, &cluster_fit_output); + compress_dxt1_cluster_fit(input_colors, colors, weights, count, color_weights, three_color_mode, &cluster_fit_output); float cluster_fit_error = evaluate_mse(input_colors, input_weights, color_weights, &cluster_fit_output); diff --git a/src/nvtt/CompressorDXT1.h b/src/nvtt/CompressorDXT1.h index daf99ac..c7e51d7 100644 --- a/src/nvtt/CompressorDXT1.h +++ b/src/nvtt/CompressorDXT1.h @@ -5,6 +5,7 @@ namespace nv { struct ColorBlock; struct BlockDXT1; class Vector3; + class Vector4; // All these functions return MSE. @@ -12,11 +13,11 @@ namespace nv { float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output); float compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); - float compress_dxt1_least_squares_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); - float compress_dxt1_bounding_box_exhaustive(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, int search_limit, BlockDXT1 * output); - void compress_dxt1_cluster_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); + float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); + float compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int search_limit, BlockDXT1 * output); + void compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output); - float compress_dxt1(const Vector3 colors[16], const float weights[16], const Vector3 & color_weights, BlockDXT1 * output); + float compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output); } diff --git a/src/nvtt/CompressorDXT5_RGBM.cpp b/src/nvtt/CompressorDXT5_RGBM.cpp new file mode 100755 index 0000000..21d4b06 --- /dev/null +++ b/src/nvtt/CompressorDXT5_RGBM.cpp @@ -0,0 +1,423 @@ +#include "CompressorDXT5_RGBM.h" +#include "CompressorDXT1.h" + +#include "OptimalCompressDXT.h" +#include "QuickCompressDXT.h" + +#include "nvimage/ColorBlock.h" +#include "nvimage/BlockDXT.h" + +#include "nvmath/Color.inl" +#include "nvmath/Vector.inl" +#include "nvmath/Fitting.h" +#include "nvmath/ftoi.h" + +#include "nvthread/Atomic.h" +#include + +using namespace nv; + +static uint atomic_counter = 0; + + +float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) { + + // Convert to RGBM. + Vector4 input_colors_rgbm[16]; // @@ Write over input_colors? + float rgb_weights[16]; + + float weight_sum = 0; + + for (uint i = 0; i < 16; i++) { + const Vector4 & c = input_colors[i]; + + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float M = max(max(R, G), max(B, min_m)); + float r = R / M; + float g = G / M; + float b = B / M; + float a = (M - min_m) / (1 - min_m); + + input_colors_rgbm[i] = Vector4(r, g, b, a); + rgb_weights[i] = input_weights[i] * M; + weight_sum += input_weights[i]; + } + + if (weight_sum == 0) { + for (uint i = 0; i < 16; i++) rgb_weights[i] = 1; + } + + // Compress RGB. + compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color); + + // Decompress RGB/M block. + nv::ColorBlock RGB; + output->color.decodeBlock(&RGB); + + // Compute M values to compensate for RGB's error. + AlphaBlock4x4 M; + for (int i = 0; i < 16; i++) { + const Vector4 & c = input_colors[i]; + + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float rm = RGB.color(i).r / 255.0f; + float gm = RGB.color(i).g / 255.0f; + float bm = RGB.color(i).b / 255.0f; + + // compute m such that m * (r/M, g/M, b/M) == RGB + + // Three equations, one unknown: + // m * r/M == R + // m * g/M == G + // m * b/M == B + + // Solve in the least squares sense! + + // m (rm gm bm) (rm gm bm)^T == (rm gm bm) (R G B)^T + + // m == dot(rgb, RGB) / dot(rgb, rgb) + + float m = dot(Vector3(rm, gm, bm), Vector3(R, G, B)) / dot(Vector3(rm, gm, bm), Vector3(rm, gm, bm)); + + m = (m - min_m) / (1 - min_m); + + if (m > 1.0f) { + uint counter = atomicIncrement(&atomic_counter); + printf("It happens %u times!", counter); + } + + M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); + M.weights[i] = input_weights[i]; + } + + // Compress M. + //if (compressionOptions.quality == Quality_Fastest) { + // QuickCompress::compressDXT5A(M, &output->alpha); + /*} + else {*/ + OptimalCompress::compressDXT5A(M, &output->alpha); + //} + + +#if 0 // Multiple iterations do not seem to help. + // Decompress M. + output->alpha.decodeBlock(&M); + + // Feed it back to the input RGB block. + for (uint i = 0; i < 16; i++) { + const Vector4 & c = input_colors[i]; + + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m; + + float r = R / m; + float g = G / m; + float b = B / m; + float a = float(M.alpha[i]) / 255.0f; + + input_colors_rgbm[i] = Vector4(r, g, b, a); + rgb_weights[i] = input_weights[i] * m; + } +#endif + + return 0; // @@ +} + + + + +#if 0 + + BlockDXT5 * block = new(output)BlockDXT5; + + // Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors. + + // Compress the resulting M values optimally. + + // Repeat this several times until compression error does not improve? + + //Vector3 rgb_block[16]; + //float m_block[16]; + + + // Init RGB/M block. +#if 0 + nvsquish::WeightedClusterFit fit; + + ColorBlock rgba; + for (int i = 0; i < 16; i++) { + const Vector4 & c = src.color(i); + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float M = max(max(R, G), max(B, min_m)); + float r = R / M; + float g = G / M; + float b = B / M; + float a = c.w; + + rgba.color(i) = toColor32(Vector4(r, g, b, a)); + } + + if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), &block->color); + } + else + { + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + int flags = 0; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); + fit.SetColourSet(&colours, 0); + fit.Compress(&block->color); + } +#endif +#if 1 + ColorSet rgb; + rgb.allocate(4, 4); + + for (uint i = 0; i < 16; i++) { + const Vector4 & c = colors[i]; + + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float M = max(max(R, G), max(B, min_m)); + float r = R / M; + float g = G / M; + float b = B / M; + float a = c.w; + + rgb.colors[i] = Vector4(r, g, b, a); + rgb.indices[i] = i; + rgb.weights[i] = max(weights[i], 0.001f);// weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set. + } + + rgb.createMinimalSet(/*ignoreTransparent=*/true); + + if (rgb.isSingleColor(/*ignoreAlpha=*/true)) { + OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color); + } + else { + ClusterFit fit; + fit.setColorWeights(compressionOptions.colorWeight); + fit.setColorSet(&rgb); + + Vector3 start, end; + fit.compress4(&start, &end); + + QuickCompress::outputBlock4(rgb, start, end, &block->color); + } +#endif + + // Decompress RGB/M block. + nv::ColorBlock RGB; + block->color.decodeBlock(&RGB); + +#if 1 + AlphaBlock4x4 M; + for (int i = 0; i < 16; i++) { + const Vector4 & c = colors[i]; + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float r = RGB.color(i).r / 255.0f; + float g = RGB.color(i).g / 255.0f; + float b = RGB.color(i).b / 255.0f; + + float m = (R / r + G / g + B / b) / 3.0f; + //float m = max((R / r + G / g + B / b) / 3.0f, min_m); + //float m = max(max(R / r, G / g), max(B / b, min_m)); + //float m = max(max(R, G), max(B, min_m)); + + m = (m - min_m) / (1 - min_m); + + M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); + M.weights[i] = weights[i]; + } + + // Compress M. + if (compressionOptions.quality == Quality_Fastest) { + QuickCompress::compressDXT5A(M, &block->alpha); + } + else { + OptimalCompress::compressDXT5A(M, &block->alpha); + } +#else + OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha); +#endif + +#if 0 + // Decompress M. + block->alpha.decodeBlock(&M); + + rgb.allocate(src.w, src.h); // @@ Handle smaller blocks. + + for (uint i = 0; i < src.colorCount; i++) { + const Vector4 & c = src.color(i); + + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + //float m = max(max(R, G), max(B, min_m)); + float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m; + float r = R / m; + float g = G / m; + float b = B / m; + float a = c.w; + + rgb.colors[i] = Vector4(r, g, b, a); + rgb.indices[i] = i; + rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set. + } + + rgb.createMinimalSet(/*ignoreTransparent=*/true); + + if (rgb.isSingleColor(/*ignoreAlpha=*/true)) { + OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color); + } + else { + ClusterFit fit; + fit.setMetric(compressionOptions.colorWeight); + fit.setColourSet(&rgb); + + Vector3 start, end; + fit.compress4(&start, &end); + + QuickCompress::outputBlock4(rgb, start, end, &block->color); + } +#endif + +#if 0 + block->color.decodeBlock(&RGB); + + //AlphaBlock4x4 M; + //M.initWeights(src); + + for (int i = 0; i < 16; i++) { + const Vector4 & c = src.color(i); + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float r = RGB.color(i).r / 255.0f; + float g = RGB.color(i).g / 255.0f; + float b = RGB.color(i).b / 255.0f; + + float m = (R / r + G / g + B / b) / 3.0f; + //float m = max((R / r + G / g + B / b) / 3.0f, min_m); + //float m = max(max(R / r, G / g), max(B / b, min_m)); + //float m = max(max(R, G), max(B, min_m)); + + m = (m - min_m) / (1 - min_m); + + M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); + M.weights[i] = src.weights[i]; + } + + // Compress M. + if (compressionOptions.quality == Quality_Fastest) { + QuickCompress::compressDXT5A(M, &block->alpha); + } + else { + OptimalCompress::compressDXT5A(M, &block->alpha); + } +#endif + + + +#if 0 + src.fromRGBM(M, min_m); + + src.createMinimalSet(/*ignoreTransparent=*/true); + + if (src.isSingleColor(/*ignoreAlpha=*/true)) { + OptimalCompress::compressDXT1(src.color(0), &block->color); + } + else { + // @@ Use our improved compressor. + ClusterFit fit; + fit.setMetric(compressionOptions.colorWeight); + fit.setColourSet(&src); + + Vector3 start, end; + fit.compress4(&start, &end); + + if (fit.compress3(&start, &end)) { + QuickCompress::outputBlock3(src, start, end, block->color); + } + else { + QuickCompress::outputBlock4(src, start, end, block->color); + } + } +#endif // 0 + + // @@ Decompress color and compute M that best approximates src with these colors? Then compress M again? + + + + // RGBM encoding. + // Maximize precision. + // - Number of possible grey levels: + // - Naive: 2^3 = 8 + // - Better: 2^3 + 2^2 = 12 + // - How to choose min_m? + // - Ideal = Adaptive per block, don't know where to store. + // - Adaptive per lightmap. How to compute optimal? + // - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range. + + // - Optimal compressor: Interpolation artifacts. + + // - Color transform. + // - Measure error in post-tone-mapping color space. + // - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game. + // - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space. + + // - Enhanced DXT compressor. + // - Typical RGBM encoding as follows: + // rgb -> M = max(rgb), RGB=rgb/M -> RGBM + // - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1. + // - We could ensure that M' >= M during compression. + // - We could clamp RGB anyway. + // - We could add a fixed scale value to take into account compression errors and avoid clamping. + + + + + + // Compress color. + /*if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), &block->color); + } + else + { + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + int flags = 0; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); + fit.SetColourSet(&colours, 0); + fit.Compress(&block->color); + }*/ + +#endif // 0 \ No newline at end of file diff --git a/src/nvtt/CompressorDXT5_RGBM.h b/src/nvtt/CompressorDXT5_RGBM.h new file mode 100755 index 0000000..88cf646 --- /dev/null +++ b/src/nvtt/CompressorDXT5_RGBM.h @@ -0,0 +1,9 @@ + +namespace nv { + + struct BlockDXT5; + class Vector4; + + float compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output); + +} diff --git a/src/nvtt/CompressorRGB.cpp b/src/nvtt/CompressorRGB.cpp index 442c251..1ef7327 100644 --- a/src/nvtt/CompressorRGB.cpp +++ b/src/nvtt/CompressorRGB.cpp @@ -33,6 +33,7 @@ #include "nvmath/Color.h" #include "nvmath/Half.h" #include "nvmath/ftoi.h" +#include "nvmath/Vector.inl" #include "nvcore/Debug.h" @@ -159,6 +160,164 @@ namespace } + // IC: Inf/NaN and denormal handling based on DirectXMath. + static float fromFloat11(uint u) { + // 5 bit exponent + // 6 bit mantissa + + uint E = (u >> 6) & 0x1F; + uint M = u & 0x3F; + + Float754 F; + F.field.negative = 0; + + if (E == 0x1f) { // INF or NAN. + E = 0xFF; + } + else { + if (E != 0) { + F.field.biasedexponent = E + 127 - 15; + F.field.mantissa = M << (23 - 6); + } + else if (M != 0) { + E = 1; + do { + E--; + M <<= 1; + } while((M & 0x40) == 0); + + M &= 0x3F; + } + } + + F.field.biasedexponent = 0xFF; + F.field.mantissa = M << (23 - 6); + + +#if 0 + // X Channel (6-bit mantissa) + Mantissa = pSource->xm; + + if ( pSource->xe == 0x1f ) // INF or NAN + { + Result[0] = 0x7f800000 | (pSource->xm << 17); + } + else + { + if ( pSource->xe != 0 ) // The value is normalized + { + Exponent = pSource->xe; + } + else if (Mantissa != 0) // The value is denormalized + { + // Normalize the value in the resulting float + Exponent = 1; + + do + { + Exponent--; + Mantissa <<= 1; + } while ((Mantissa & 0x40) == 0); + + Mantissa &= 0x3F; + } + else // The value is zero + { + Exponent = (uint32_t)-112; + } + + Result[0] = ((Exponent + 112) << 23) | (Mantissa << 17); + } + } +#endif + + } + + // https://www.opengl.org/registry/specs/EXT/texture_shared_exponent.txt + Float3SE toFloat3SE(float r, float g, float b) + { + const int N = 9; // Mantissa bits. + const int E = 5; // Exponent bits. + const int Emax = (1 << E) - 1; // 31 + const int B = (1 << (E-1)) - 1; // 15 + const float sharedexp_max = float((1 << N) - 1) / (1 << N) * (1 << (Emax-B)); // 65408 + + // Clamp color components. + r = max(0.0f, min(sharedexp_max, r)); + g = max(0.0f, min(sharedexp_max, g)); + b = max(0.0f, min(sharedexp_max, b)); + + // Get max component. + float max_c = max3(r, g, b); + + // Compute shared exponent. + int exp_shared_p = max(-B-1, ftoi_floor(log2f(max_c))) + 1 + B; + + int max_s = ftoi_round(max_c / (1 << (exp_shared_p - B - N))); + + int exp_shared = exp_shared_p; + if (max_s == (1 << N)) exp_shared++; + + Float3SE v; + v.e = exp_shared; + + // Compute mantissas. + v.xm = ftoi_round(r / (1 << (exp_shared - B - N))); + v.ym = ftoi_round(g / (1 << (exp_shared - B - N))); + v.zm = ftoi_round(b / (1 << (exp_shared - B - N))); + + return v; + } + + Vector3 fromFloat3SE(Float3SE v) { + Float754 f; + f.raw = 0x33800000 + (v.e << 23); + float scale = f.value; + return scale * Vector3(float(v.xm), float(v.ym), float(v.zm)); + } + + // These are based on: http://www.graphics.cornell.edu/~bjw/rgbe/rgbe.c + uint toRGBE(float r, float g, float b) + { + float v = max3(r, g, b); + + uint rgbe; + + if (v < 1e-32) { + rgbe = 0; + } + else { + int e; + float scale = frexpf(v, &e) * 256.0f / v; + //Float754 f; + //f.value = v; + //float scale = f.field.biasedexponent * 256.0f / v; + //e = f.field.biasedexponent - 127 + + rgbe |= U8(ftoi_round(r * scale)) << 0; + rgbe |= U8(ftoi_round(g * scale)) << 8; + rgbe |= U8(ftoi_round(b * scale)) << 16; + rgbe |= U8(e + 128) << 24; + } + + return rgbe; + } + + Vector3 fromRGBE(uint rgbe) { + uint r = (rgbe >> 0) & 0xFF; + uint g = (rgbe >> 8) & 0xFF; + uint b = (rgbe >> 16) & 0xFF; + uint e = (rgbe >> 24); + + if (e != 0) { + float scale = ldexpf(1.0f, e-(int)(128+8)); // +8 to divide by 256. @@ Shouldn't we divide by 255 instead? + return scale * Vector3(float(r), float(g), float(b)); + } + + return Vector3(0); + } + + struct BitStream { BitStream(uint8 * ptr) : ptr(ptr), buffer(0), bits(0) { @@ -348,6 +507,20 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint else if (asize == 10) stream.putFloat10(a); else stream.putBits(0, asize); } + else if (compressionOptions.pixelType == nvtt::PixelType_SharedExp) + { + if (rsize == 9 && gsize == 9 && bsize == 9 && asize == 5) { + Float3SE v = toFloat3SE(r, g, b); + stream.putBits(v.v, 32); + } + else if (rsize == 8 && gsize == 8 && bsize == 8 && asize == 8) { + // @@ + } + else { + // @@ Not supported. Filling with zeros. + stream.putBits(0, bitCount); + } + } else { // We first convert to 16 bits, then to the target size. @@ If greater than 16 bits, this will truncate and bitexpand. diff --git a/src/nvtt/Context.cpp b/src/nvtt/Context.cpp index fea0017..b1b6e6f 100644 --- a/src/nvtt/Context.cpp +++ b/src/nvtt/Context.cpp @@ -447,29 +447,38 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int { const uint bitcount = compressionOptions.getBitCount(); - if (bitcount == 16) - { - if (compressionOptions.rsize == 16) - { - header.setDX10Format(56); // R16_UNORM + if (compressionOptions.pixelType == PixelType_Float) { + if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 16 && compressionOptions.asize == 16) { + header.setDX10Format(DXGI_FORMAT_R16G16B16A16_FLOAT); } - else - { - // B5G6R5_UNORM - // B5G5R5A1_UNORM + else if (compressionOptions.rsize == 11 && compressionOptions.gsize == 11 && compressionOptions.bsize == 10 && compressionOptions.asize == 0) { + header.setDX10Format(DXGI_FORMAT_R11G11B10_FLOAT); + } + else { supported = false; } } - else if (bitcount == 32) - { - // B8G8R8A8_UNORM - // B8G8R8X8_UNORM - // R8G8B8A8_UNORM - // R10G10B10A2_UNORM - supported = false; - } else { - supported = false; + if (bitcount == 16) { + if (compressionOptions.rsize == 16) { + header.setDX10Format(DXGI_FORMAT_R16_UNORM); + } + else { + // B5G6R5_UNORM + // B5G5R5A1_UNORM + supported = false; + } + } + else if (bitcount == 32) { + // B8G8R8A8_UNORM + // B8G8R8X8_UNORM + // R8G8B8A8_UNORM + // R10G10B10A2_UNORM + supported = false; + } + else { + supported = false; + } } } else @@ -492,7 +501,7 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int else if (compressionOptions.format == Format_BC4) { header.setDX10Format(DXGI_FORMAT_BC4_UNORM); // DXGI_FORMAT_BC4_SNORM ? } - else if (compressionOptions.format == Format_BC5 || compressionOptions.format == Format_BC5_Luma) { + else if (compressionOptions.format == Format_BC5 /*|| compressionOptions.format == Format_BC5_Luma*/) { header.setDX10Format(DXGI_FORMAT_BC5_UNORM); // DXGI_FORMAT_BC5_SNORM ? if (isNormalMap) header.setNormalFlag(true); } @@ -605,7 +614,7 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int else if (compressionOptions.format == Format_BC4) { header.setFourCC('A', 'T', 'I', '1'); } - else if (compressionOptions.format == Format_BC5 || compressionOptions.format == Format_BC5_Luma) { + else if (compressionOptions.format == Format_BC5 /*|| compressionOptions.format == Format_BC5_Luma*/) { header.setFourCC('A', 'T', 'I', '2'); if (isNormalMap) { header.setNormalFlag(true); @@ -773,10 +782,10 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression { return new CompressorBC7; } - else if (compressionOptions.format == Format_BC5_Luma) + /*else if (compressionOptions.format == Format_BC5_Luma) { return new ProductionCompressorBC5_Luma; - } + }*/ else if (compressionOptions.format == Format_BC3_RGBM) { return new CompressorBC3_RGBM; diff --git a/src/nvtt/OptimalCompressDXT.cpp b/src/nvtt/OptimalCompressDXT.cpp index 602b6af..4c3731e 100644 --- a/src/nvtt/OptimalCompressDXT.cpp +++ b/src/nvtt/OptimalCompressDXT.cpp @@ -614,7 +614,7 @@ void OptimalCompress::compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst compressDXT5A(tmp, dst); } - +#if 0 #include "nvmath/Vector.inl" #include "nvmath/ftoi.h" const float threshold = 0.15f; @@ -809,3 +809,4 @@ void OptimalCompress::compressDXT5A_RGBM(const ColorSet & src, const ColorBlock computeAlphaIndices_RGBM(src, RGB, dst); } +#endif // 0 \ No newline at end of file diff --git a/src/nvtt/QuickCompressDXT.cpp b/src/nvtt/QuickCompressDXT.cpp index 8390610..4676fee 100644 --- a/src/nvtt/QuickCompressDXT.cpp +++ b/src/nvtt/QuickCompressDXT.cpp @@ -227,6 +227,7 @@ inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColo } // maxColor and minColor are expected to be in the same range as the color set. +/* inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor) { Vector3 palette[4]; @@ -290,7 +291,7 @@ inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor, } return indices; -} +}*/ inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) { @@ -341,7 +342,7 @@ inline static float evaluatePaletteError3(const Vector3 block[16], Vector3::Arg // maxColor and minColor are expected to be in the same range as the color set. -inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor) +/*inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor) { Vector3 palette[4]; palette[0] = minColor; @@ -372,7 +373,7 @@ inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, } return indices; -} +}*/ inline static uint computeIndices3(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) { @@ -827,7 +828,7 @@ void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, -void QuickCompress::outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block) +/*void QuickCompress::outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block) { Vector3 minColor = start * 255.0f; Vector3 maxColor = end * 255.0f; @@ -866,4 +867,4 @@ void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, co //optimizeEndPoints3(set, block); } - +*/ diff --git a/src/nvtt/Surface.cpp b/src/nvtt/Surface.cpp index 51d1521..aa612ee 100644 --- a/src/nvtt/Surface.cpp +++ b/src/nvtt/Surface.cpp @@ -37,6 +37,7 @@ #include "nvimage/ColorBlock.h" #include "nvimage/PixelFormat.h" #include "nvimage/ErrorMetric.h" +#include "nvimage/DirectDrawSurface.h" #include #include // memset, memcpy @@ -85,7 +86,7 @@ namespace else if (format == Format_BC4) { return 8; } - else if (format == Format_BC5 || format == Format_BC5_Luma) { + else if (format == Format_BC5 /*|| format == Format_BC5_Luma*/) { return 16; } else if (format == Format_CTX1) { @@ -469,11 +470,66 @@ void Surface::range(int channel, float * rangeMin, float * rangeMax, int alpha_c *rangeMax = range.y; } - bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/) { AutoPtr img(ImageIO::loadFloat(fileName)); if (img == NULL) { + // Try loading as DDS. + if (nv::strEqual(nv::Path::extension(fileName), ".dds")) { + nv::DirectDrawSurface dds; + if (dds.load(fileName)) { + if (dds.header.isBlockFormat()) { + int w = dds.surfaceWidth(0); + int h = dds.surfaceHeight(0); + uint size = dds.surfaceSize(0); + + void * data = malloc(size); + dds.readSurface(0, 0, data, size); + + // @@ Handle all formats! @@ Get nvtt format from dds.surfaceFormat() ? + + if (dds.header.hasDX10Header()) { + if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16) { + this->setImage2D(nvtt::Format_BC6, nvtt::Decoder_D3D10, w, h, data); + } + else { + // @@ + nvCheck(false); + } + } + else { + uint fourcc = dds.header.pf.fourcc; + if (fourcc == FOURCC_DXT1) { + this->setImage2D(nvtt::Format_BC1, nvtt::Decoder_D3D10, w, h, data); + } + else if (fourcc == FOURCC_DXT5) { + this->setImage2D(nvtt::Format_BC3, nvtt::Decoder_D3D10, w, h, data); + } + else { + // @@ + nvCheck(false); + } + } + + free(data); + } + else { + Image img; + dds.mipmap(&img, /*face=*/0, /*mipmap=*/0); + + int w = img.width(); + int h = img.height(); + int d = img.depth(); + + // @@ Add support for all pixel formats. + + this->setImage(nvtt::InputFormat_BGRA_8UB, w, h, d, img.pixels()); + } + + return true; + } + } + return false; } @@ -768,22 +824,22 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi { for (int x = 0; x < bw; x++) { - ColorSet colors; - const BlockBC6 * block = (const BlockBC6 *)ptr; - block->decodeBlock(&colors); + Vector3 colors[16]; + const BlockBC6 * block = (const BlockBC6 *)ptr; + block->decodeBlock(colors); for (int yy = 0; yy < 4; yy++) { for (int xx = 0; xx < 4; xx++) { - Vector4 rgba = colors.colors[yy*4 + xx]; + Vector3 rgb = colors[yy*4 + xx]; if (x * 4 + xx < w && y * 4 + yy < h) { - m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = rgba.x; - m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = rgba.y; - m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = rgba.z; - m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = rgba.w; + m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = rgb.x; + m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = rgb.y; + m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = rgb.z; + m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = 1.0f; } } } @@ -1579,25 +1635,32 @@ void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/) float bestM; float bestError = FLT_MAX; + //float range = 15; // 4 bit quantization. + //int irange = 16; + float range = 255; // 8 bit quantization. + int irange = 256; + + float M = max(max(R, G), max(B, threshold)); - int iM = ftoi_ceil((M - threshold) / (1 - threshold) * 255.0f); + int iM = ftoi_ceil((M - threshold) / (1 - threshold) * range); //for (int m = 0; m < 256; m++) { // If we use the entire search space, interpolation errors are very likely to occur. - for (int m = max(iM-16, 0); m < min(iM+16, 256); m++) { // If we constrain the search space, these errors disappear. - float fm = float(m) / 255.0f; + for (int m = max(iM-16, 0); m < min(iM+16, irange); m++) { // If we constrain the search space, these errors disappear. + //for (int m = max(iM-4, 0); m < min(iM+4, irange); m++) { // If we constrain the search space, these errors disappear. + float fm = float(m) / range; // Decode M float M = fm * (1 - threshold) + threshold; // Encode. - int ir = ftoi_round(255.0f * nv::saturate(R / M)); - int ig = ftoi_round(255.0f * nv::saturate(G / M)); - int ib = ftoi_round(255.0f * nv::saturate(B / M)); + int ir = ftoi_round(range * nv::saturate(R / M)); + int ig = ftoi_round(range * nv::saturate(G / M)); + int ib = ftoi_round(range * nv::saturate(B / M)); // Decode. - float fr = (float(ir) / 255.0f) * M; - float fg = (float(ig) / 255.0f) * M; - float fb = (float(ib) / 255.0f) * M; + float fr = (float(ir) / range) * M; + float fg = (float(ig) / range) * M; + float fb = (float(ib) / range) * M; // Measure error. float error = square(R-fr) + square(G-fg) + square(B-fb); @@ -2961,3 +3024,189 @@ float nvtt::rmsToneMappedError(const Surface & reference, const Surface & img, f return nv::rmsColorError(r.m->image, i.m->image, reference.alphaMode() == nvtt::AlphaMode_Transparency); } + +Surface nvtt::histogram(const Surface & img, int width, int height) +{ + float min_color[3], max_color[3]; + img.range(0, &min_color[0], &max_color[0]); + img.range(1, &min_color[1], &max_color[1]); + img.range(2, &min_color[2], &max_color[2]); + + float minRange = nv::min3(min_color[0], min_color[1], min_color[2]); + float maxRange = nv::max3(max_color[0], max_color[1], max_color[2]); + + if (maxRange > 16) maxRange = 16; + + return histogram(img, /*minRange*/0, maxRange, width, height); +} + +#include "nvcore/Array.inl" +#include "nvmath/PackedFloat.h" +#include + +nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRange, int width, int height) +{ + nv::Array buckets; + buckets.resize(width, Vector3(0)); + + int w = img.width(); + int h = img.height(); + int d = img.depth(); + + const float * r = img.channel(0); + const float * g = img.channel(1); + const float * b = img.channel(2); + const float * a = img.channel(3); + +#if 0 + for (int z = 0; z < d; z++) + for (int y = 0; y < h; y++) + for (int x = 0; x < w; x++) + { + int i = x + y * w + z * w * d; + + float fr = (r[i] - minRange) / (maxRange - minRange); + float fg = (g[i] - minRange) / (maxRange - minRange); + float fb = (b[i] - minRange) / (maxRange - minRange); + + int R = ftoi_round(fr * (width - 1)); + int G = ftoi_round(fg * (width - 1)); + int B = ftoi_round(fb * (width - 1)); + + R = nv::clamp(R, 0, width-1); + G = nv::clamp(G, 0, width-1); + B = nv::clamp(B, 0, width-1); + + // Alpha weighted histogram? + float A = nv::saturate(a[i]); + + buckets[R].x += A; + buckets[G].y += A; + buckets[B].z += A; + } + +#elif 1 + + float exposure = 0.22f; + + //int E = 8, M = 23; // float + int E = 5, M = 10; // half + //int E = 5, M = 9; // rgb9e5 + //int E = 5, M = 6; // r11g11b10 + + for (int e = 0; e < (1 << E); e++) + { + /*if (e == 0x1f) { // Skip NaN and inf. + continue; + }*/ + if (e == 0) { // Skip denormals. + continue; + } + + for (int m = 0; m < (1 << M); m++) + { + Float754 F; + F.field.negative = 0; + F.field.biasedexponent = e + 128 - (1 << (E - 1)) - 1; // E=5 -> 128 - 15 + F.field.mantissa = m << (23 - M); + + // value = (1 + mantissa) * 2^(e-15) + + // @@ Handle denormals. + + float fc = F.value; + + // Tone mapping: + fc /= exposure; + //fc /= (fc + 1); // Reindhart tone mapping. + fc = 1 - exp2f(-fc); // Halo2 tone mapping. + + // Gamma space conversion: + //fc = sqrtf(fc); + fc = powf(fc, 1.0f/2.2f); + //fc = toSrgb(fc); + + //fc = (fc - 0.5f) * 8; // zoom in + //if (fc < 0 || fc > 1) continue; + + //printf("%f\n", fc); + + int c = ftoi_round(fc * (width - 1) / 1); + c = clamp(c, 0, width - 1); + + buckets[c] += Vector3(1); + } + } + +#else + + float exposure = 0.22f; + + int R = 8, M = 8; + //int R = 6, M = 8; + //int R = 9, M = 5; + + float threshold = 1.0f / (1 << M); + //float threshold = 0.25f; + + for (int r = 0; r < (1 << R); r++) + { + float fr = float(r) / ((1 << R) - 1); + + for (int m = 0; m < (1 << M); m++) + { + float fm = float(m) / ((1 << M) - 1); + float M = fm * (1 - threshold) + threshold; + + float fc = fr * M; + + fc /= exposure; + + //fc /= (fc + 1); // Reindhart tone mapping. + fc = 1 - exp2f(-fc); // Halo2 tone mapping. + + // Gamma space conversion: + //fc = sqrtf(fc); + fc = powf(fc, 1.0f/2.2f); + //fc = toSrgb(fc); + + //fc = (fc - 0.5f) * 8; // zoom in + //if (fc < 0 || fc > 1) continue; + + int c = ftoi_round(fc * (width - 1)); + c = clamp(c, 0, width - 1); + + buckets[c] += Vector3(1); + } + } + + //buckets[0] = Vector3(1); // Hack, for prettier histograms. + +#endif + + + // Compute largerst height. + float maxh = 0; + for (int i = 0; i < width; i++) { + maxh = nv::max(maxh, nv::max3(buckets[i].x, buckets[i].y, buckets[i].z)); + } + + printf("maxh = %f\n", maxh); + //maxh = 80; + maxh = 256; + + // Draw histogram. + nvtt::Surface hist; + hist.setImage(width, height, 1); + + for (int y = 0; y < height; y++) { + float fy = 1.0f - float(y) / (height - 1); + for (int x = 0; x < width; x++) { + hist.m->image->pixel(0, x, y, /*z=*/0) = fy < (buckets[x].x / maxh); + hist.m->image->pixel(1, x, y, /*z=*/0) = fy < (buckets[x].y / maxh); + hist.m->image->pixel(2, x, y, /*z=*/0) = fy < (buckets[x].z / maxh); + } + } + + return hist; +} diff --git a/src/nvtt/nvtt.cpp b/src/nvtt/nvtt.cpp index 83b9aac..b85d52e 100644 --- a/src/nvtt/nvtt.cpp +++ b/src/nvtt/nvtt.cpp @@ -23,12 +23,14 @@ // OTHER DEALINGS IN THE SOFTWARE. #include "nvtt.h" +#include "nvcore/nvcore.h" using namespace nvtt; -/// Return a string for the given error. +// Return a string for the given error. const char * nvtt::errorString(Error e) { + NV_COMPILER_CHECK(Error_Count == 7); switch(e) { case Error_Unknown: @@ -50,11 +52,8 @@ const char * nvtt::errorString(Error e) return "Invalid error"; } -/// Return NVTT version. +// Return NVTT version. unsigned int nvtt::version() { return NVTT_VERSION; } - - - diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index 0195362..8724d42 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -71,7 +71,7 @@ namespace nvtt struct CubeSurface; - // Supported compression formats. + // Supported block-compression formats. // @@ I wish I had distinguished between "formats" and compressors. // That is: // - 'DXT1' is a format 'DXT1a' and 'DXT1n' are DXT1 compressors. @@ -79,7 +79,7 @@ namespace nvtt // Having multiple enums for the same ids only creates confusion. Clean this up. enum Format { - // No compression. + // No block-compression (linear). Format_RGB, Format_RGBA = Format_RGB, @@ -105,7 +105,7 @@ namespace nvtt Format_BC6, // Not supported yet. Format_BC7, // Not supported yet. - Format_BC5_Luma, // Two DXT alpha blocks encoding a single float. + //Format_BC5_Luma, // Two DXT alpha blocks encoding a single float. Format_BC3_RGBM, // Format_Count @@ -120,6 +120,7 @@ namespace nvtt PixelType_SignedInt = 3, // Not supported yet. PixelType_Float = 4, PixelType_UnsignedFloat = 5, + PixelType_SharedExp = 6, // Shared exponent. }; // Quality modes. @@ -309,7 +310,7 @@ namespace nvtt // Output data. Compressed data is output as soon as it's generated to minimize memory allocations. virtual bool writeData(const void * data, int size) = 0; - // Indicate the end of a the compressed image. (New in NVTT 2.1) + // Indicate the end of the compressed image. (New in NVTT 2.1) virtual void endImage() = 0; }; @@ -323,6 +324,7 @@ namespace nvtt Error_FileOpen, Error_FileWrite, Error_UnsupportedOutputFormat, + Error_Count }; // Error handler. @@ -660,6 +662,10 @@ namespace nvtt NVTT_API float rmsToneMappedError(const Surface & reference, const Surface & img, float exposure); + + NVTT_API Surface histogram(const Surface & img, int width, int height); + NVTT_API Surface histogram(const Surface & img, float minRange, float maxRange, int width, int height); + } // nvtt namespace #endif // NVTT_H diff --git a/src/nvtt/tools/compress.cpp b/src/nvtt/tools/compress.cpp index c7d662d..3a3ce7c 100644 --- a/src/nvtt/tools/compress.cpp +++ b/src/nvtt/tools/compress.cpp @@ -152,6 +152,8 @@ int main(int argc, char *argv[]) bool premultiplyAlpha = false; nvtt::MipmapFilter mipmapFilter = nvtt::MipmapFilter_Box; bool loadAsFloat = false; + bool rgbm = false; + bool rangescale = false; const char * externalCompressor = NULL; @@ -209,6 +211,15 @@ int main(int argc, char *argv[]) { loadAsFloat = true; } + else if (strcmp("-rgbm", argv[i]) == 0) + { + rgbm = true; + } + else if (strcmp("-rangescale", argv[i]) == 0) + { + rangescale = true; + } + // Compression options. else if (strcmp("-fast", argv[i]) == 0) @@ -269,6 +280,11 @@ int main(int argc, char *argv[]) { format = nvtt::Format_BC7; } + else if (strcmp("-bc3_rgbm", argv[i]) == 0) + { + format = nvtt::Format_BC3_RGBM; + rgbm = true; + } // Undocumented option. Mainly used for testing. else if (strcmp("-ext", argv[i]) == 0) @@ -332,32 +348,35 @@ int main(int argc, char *argv[]) printf("usage: nvcompress [options] infile [outfile.dds]\n\n"); printf("Input options:\n"); - printf(" -color \tThe input image is a color map (default).\n"); - printf(" -alpha \tThe input image has an alpha channel used for transparency.\n"); - printf(" -normal \tThe input image is a normal map.\n"); - printf(" -tonormal \tConvert input to normal map.\n"); - printf(" -clamp \tClamp wrapping mode (default).\n"); - printf(" -repeat \tRepeat wrapping mode.\n"); - printf(" -nomips \tDisable mipmap generation.\n"); - printf(" -premula \tPremultiply alpha into color channel.\n"); - printf(" -mipfilter \tMipmap filter. One of the following: box, triangle, kaiser.\n"); - printf(" -float \tLoad as floating point image.\n\n"); + printf(" -color The input image is a color map (default).\n"); + printf(" -alpha The input image has an alpha channel used for transparency.\n"); + printf(" -normal The input image is a normal map.\n"); + printf(" -tonormal Convert input to normal map.\n"); + printf(" -clamp Clamp wrapping mode (default).\n"); + printf(" -repeat Repeat wrapping mode.\n"); + printf(" -nomips Disable mipmap generation.\n"); + printf(" -premula Premultiply alpha into color channel.\n"); + printf(" -mipfilter Mipmap filter. One of the following: box, triangle, kaiser.\n"); + printf(" -float Load as floating point image.\n\n"); + printf(" -rgbm Transform input to RGBM.\n\n"); + printf(" -rangescale Scale image to use entire color range.\n\n"); printf("Compression options:\n"); - printf(" -fast \tFast compression.\n"); - printf(" -nocuda \tDo not use cuda compressor.\n"); - printf(" -rgb \tRGBA format\n"); - printf(" -lumi \tLUMINANCE format\n"); - printf(" -bc1 \tBC1 format (DXT1)\n"); - printf(" -bc1n \tBC1 normal map format (DXT1nm)\n"); - printf(" -bc1a \tBC1 format with binary alpha (DXT1a)\n"); - printf(" -bc2 \tBC2 format (DXT3)\n"); - printf(" -bc3 \tBC3 format (DXT5)\n"); - printf(" -bc3n \tBC3 normal map format (DXT5nm)\n"); - printf(" -bc4 \tBC4 format (ATI1)\n"); - printf(" -bc5 \tBC5 format (3Dc/ATI2)\n"); - printf(" -bc6 \tBC6 format\n"); - printf(" -bc7 \tBC7 format\n\n"); + printf(" -fast Fast compression.\n"); + printf(" -nocuda Do not use cuda compressor.\n"); + printf(" -rgb RGBA format\n"); + printf(" -lumi LUMINANCE format\n"); + printf(" -bc1 BC1 format (DXT1)\n"); + printf(" -bc1n BC1 normal map format (DXT1nm)\n"); + printf(" -bc1a BC1 format with binary alpha (DXT1a)\n"); + printf(" -bc2 BC2 format (DXT3)\n"); + printf(" -bc3 BC3 format (DXT5)\n"); + printf(" -bc3n BC3 normal map format (DXT5nm)\n"); + printf(" -bc4 BC4 format (ATI1)\n"); + printf(" -bc5 BC5 format (3Dc/ATI2)\n"); + printf(" -bc6 BC6 format\n"); + printf(" -bc7 BC7 format\n\n"); + printf(" -bc3_rgbm BC3-rgbm format\n\n"); printf("Output options:\n"); printf(" -silent \tDo not output progress messages\n"); @@ -376,145 +395,211 @@ int main(int argc, char *argv[]) // Set input options. nvtt::InputOptions inputOptions; - if (nv::strCaseDiff(input.extension(), ".dds") == 0) - { - // Load surface. - nv::DirectDrawSurface dds(input.str()); - if (!dds.isValid()) - { - fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str()); - return EXIT_FAILURE; - } + bool useSurface = false; // @@ use Surface API in all cases! + nvtt::Surface image; - if (!dds.isSupported()) - { - fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str()); + if (true || format == nvtt::Format_BC3_RGBM || rgbm) { + useSurface = true; + + if (!image.load(input.str())) { + fprintf(stderr, "Error opening input file '%s'.\n", input.str()); return EXIT_FAILURE; } - uint faceCount; - if (dds.isTexture2D()) - { - inputOptions.setTextureLayout(nvtt::TextureType_2D, dds.width(), dds.height()); - faceCount = 1; - } - else if (dds.isTexture3D()) - { - inputOptions.setTextureLayout(nvtt::TextureType_3D, dds.width(), dds.height(), dds.depth()); - faceCount = 1; + if (rangescale) { + // get color range + float min_color[3], max_color[3]; + image.range(0, &min_color[0], &max_color[0]); + image.range(1, &min_color[1], &max_color[1]); + image.range(2, &min_color[2], &max_color[2]); - nvDebugBreak(); - } - else - { - nvDebugCheck(dds.isTextureCube()); - inputOptions.setTextureLayout(nvtt::TextureType_Cube, dds.width(), dds.height()); - faceCount = 6; - } - - uint mipmapCount = dds.mipmapCount(); + //printf("Color range = %.2f %.2f %.2f\n", max_color[0], max_color[1], max_color[2]); - nv::Image mipmap; + float color_range = nv::max3(max_color[0], max_color[1], max_color[2]); + const float max_color_range = 16.0f; - for (uint f = 0; f < faceCount; f++) - { - for (uint m = 0; m < mipmapCount; m++) - { - dds.mipmap(&mipmap, f, m); // @@ Load as float. + if (color_range > max_color_range) { + //Log::print("Clamping color range %f to %f\n", color_range, max_color_range); + color_range = max_color_range; + } + //color_range = max_color_range; // Use a fixed color range for now. - inputOptions.setMipmapData(mipmap.pixels(), mipmap.width(), mipmap.height(), mipmap.depth(), f, m); + for (int i = 0; i < 3; i++) { + image.scaleBias(i, 1.0f / color_range, 0.0f); } + image.toneMap(nvtt::ToneMapper_Linear, /*parameters=*/NULL); // Clamp without changing the hue. + + // Clamp alpha. + image.clamp(3); + } + + if (alpha) { + image.setAlphaMode(nvtt::AlphaMode_Transparency); + } + + // To gamma. + image.toGamma(2); + + if (format != nvtt::Format_BC3_RGBM) { + image.setAlphaMode(nvtt::AlphaMode_None); + image.toRGBM(1, 0.15f); } } - else - { - if (nv::strCaseDiff(input.extension(), ".exr") == 0 || nv::strCaseDiff(input.extension(), ".hdr") == 0) - { - loadAsFloat = true; + else if (format == nvtt::Format_BC6) { + //format = nvtt::Format_BC1; + //fprintf(stderr, "BLABLABLA.\n"); + useSurface = true; + + if (!image.load(input.str())) { + fprintf(stderr, "Error opening input file '%s'.\n", input.str()); + return EXIT_FAILURE; } - if (loadAsFloat) + image.setAlphaMode(nvtt::AlphaMode_Transparency); + } + else { + if (nv::strCaseDiff(input.extension(), ".dds") == 0) { - nv::AutoPtr image(nv::ImageIO::loadFloat(input.str())); + // Load surface. + nv::DirectDrawSurface dds(input.str()); + if (!dds.isValid()) + { + fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str()); + return EXIT_FAILURE; + } - if (image == NULL) + if (!dds.isSupported()) { - fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str()); + fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str()); return EXIT_FAILURE; } - inputOptions.setFormat(nvtt::InputFormat_RGBA_32F); - inputOptions.setTextureLayout(nvtt::TextureType_2D, image->width(), image->height()); + uint faceCount; + if (dds.isTexture2D()) + { + inputOptions.setTextureLayout(nvtt::TextureType_2D, dds.width(), dds.height()); + faceCount = 1; + } + else if (dds.isTexture3D()) + { + inputOptions.setTextureLayout(nvtt::TextureType_3D, dds.width(), dds.height(), dds.depth()); + faceCount = 1; + + nvDebugBreak(); + } + else + { + nvDebugCheck(dds.isTextureCube()); + inputOptions.setTextureLayout(nvtt::TextureType_Cube, dds.width(), dds.height()); + faceCount = 6; + } + + uint mipmapCount = dds.mipmapCount(); + + nv::Image mipmap; - /*for (uint i = 0; i < image->componentNum(); i++) + for (uint f = 0; f < faceCount; f++) { - inputOptions.setMipmapChannelData(image->channel(i), i, image->width(), image->height()); - }*/ + for (uint m = 0; m < mipmapCount; m++) + { + dds.mipmap(&mipmap, f, m); // @@ Load as float. + + inputOptions.setMipmapData(mipmap.pixels(), mipmap.width(), mipmap.height(), mipmap.depth(), f, m); + } + } } else { - // Regular image. - nv::Image image; - if (!image.load(input.str())) + if (nv::strCaseDiff(input.extension(), ".exr") == 0 || nv::strCaseDiff(input.extension(), ".hdr") == 0) { - fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str()); - return 1; + loadAsFloat = true; } - inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height()); - inputOptions.setMipmapData(image.pixels(), image.width(), image.height()); + if (loadAsFloat) + { + nv::AutoPtr image(nv::ImageIO::loadFloat(input.str())); + + if (image == NULL) + { + fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str()); + return EXIT_FAILURE; + } + + inputOptions.setFormat(nvtt::InputFormat_RGBA_32F); + inputOptions.setTextureLayout(nvtt::TextureType_2D, image->width(), image->height()); + + /*for (uint i = 0; i < image->componentNum(); i++) + { + inputOptions.setMipmapChannelData(image->channel(i), i, image->width(), image->height()); + }*/ + } + else + { + // Regular image. + nv::Image image; + if (!image.load(input.str())) + { + fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str()); + return 1; + } + + inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height()); + inputOptions.setMipmapData(image.pixels(), image.width(), image.height()); + } } - } - if (wrapRepeat) - { - inputOptions.setWrapMode(nvtt::WrapMode_Repeat); - } - else - { - inputOptions.setWrapMode(nvtt::WrapMode_Clamp); - } + if (wrapRepeat) + { + inputOptions.setWrapMode(nvtt::WrapMode_Repeat); + } + else + { + inputOptions.setWrapMode(nvtt::WrapMode_Clamp); + } - if (alpha) - { - inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency); - } - else - { - inputOptions.setAlphaMode(nvtt::AlphaMode_None); - } + if (alpha) + { + inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency); + } + else + { + inputOptions.setAlphaMode(nvtt::AlphaMode_None); + } - // Block compressed textures with mipmaps must be powers of two. - if (!noMipmaps && format != nvtt::Format_RGB) - { - inputOptions.setRoundMode(nvtt::RoundMode_ToPreviousPowerOfTwo); - } + // Block compressed textures with mipmaps must be powers of two. + if (!noMipmaps && format != nvtt::Format_RGB) + { + inputOptions.setRoundMode(nvtt::RoundMode_ToPreviousPowerOfTwo); + } - if (normal) - { - setNormalMap(inputOptions); - } - else if (color2normal) - { - setColorToNormalMap(inputOptions); - } - else - { - setColorMap(inputOptions); - } + if (normal) + { + setNormalMap(inputOptions); + } + else if (color2normal) + { + setColorToNormalMap(inputOptions); + } + else + { + setColorMap(inputOptions); + } - if (noMipmaps) - { - inputOptions.setMipmapGeneration(false); + if (noMipmaps) + { + inputOptions.setMipmapGeneration(false); + } + + /*if (premultiplyAlpha) + { + inputOptions.setPremultiplyAlpha(true); + inputOptions.setAlphaMode(nvtt::AlphaMode_Premultiplied); + }*/ + + inputOptions.setMipmapFilter(mipmapFilter); } - /*if (premultiplyAlpha) - { - inputOptions.setPremultiplyAlpha(true); - inputOptions.setAlphaMode(nvtt::AlphaMode_Premultiplied); - }*/ - inputOptions.setMipmapFilter(mipmapFilter); nvtt::CompressionOptions compressionOptions; compressionOptions.setFormat(format); @@ -545,8 +630,25 @@ int main(int argc, char *argv[]) //compressionOptions.setQuantization(/*color dithering*/true, /*alpha dithering*/false, /*binary alpha*/false); //compressionOptions.setPixelType(nvtt::PixelType_UnsignedNorm); //compressionOptions.setPixelFormat(5, 6, 5, 0); + //compressionOptions.setPixelFormat(8, 8, 8, 8); + + // A4R4G4B4 + //compressionOptions.setPixelFormat(16, 0xF00, 0xF0, 0xF, 0xF000); + + //compressionOptions.setPixelFormat(32, 0xFF0000, 0xFF00, 0xFF, 0xFF000000); + + // R10B20G10A2 + //compressionOptions.setPixelFormat(10, 10, 10, 2); + + // DXGI_FORMAT_R11G11B10_FLOAT + compressionOptions.setPixelType(nvtt::PixelType_Float); + compressionOptions.setPixelFormat(11, 11, 10, 0); } } + else if (format == nvtt::Format_BC6) + { + compressionOptions.setPixelType(nvtt::PixelType_UnsignedFloat); + } if (fast) { @@ -599,7 +701,15 @@ int main(int argc, char *argv[]) } } - outputHandler.setTotal(context.estimateSize(inputOptions, compressionOptions)); + int outputSize = 0; + if (useSurface) { + outputSize = context.estimateSize(image, 1, compressionOptions); + } + else { + outputSize = context.estimateSize(inputOptions, compressionOptions); + } + + outputHandler.setTotal(outputSize); outputHandler.setDisplayProgress(!silent); nvtt::OutputOptions outputOptions; @@ -625,10 +735,22 @@ int main(int argc, char *argv[]) nv::Timer timer; timer.start(); - if (!context.process(inputOptions, compressionOptions, outputOptions)) - { - return EXIT_FAILURE; + if (useSurface) { + if (!context.outputHeader(image, 1, compressionOptions, outputOptions)) { + fprintf(stderr, "Error writing file header.\n"); + return EXIT_FAILURE; + } + if (!context.compress(image, 0, 0, compressionOptions, outputOptions)) { + fprintf(stderr, "Error compressing file.\n"); + return EXIT_FAILURE; + } } + else { + if (!context.process(inputOptions, compressionOptions, outputOptions)) { + return EXIT_FAILURE; + } + } + timer.stop(); if (!silent) { diff --git a/src/nvtt/tools/decompress.cpp b/src/nvtt/tools/decompress.cpp index 0163cca..21a70b4 100644 --- a/src/nvtt/tools/decompress.cpp +++ b/src/nvtt/tools/decompress.cpp @@ -29,6 +29,8 @@ #include +#include + #include "cmdline.h" #include // clock @@ -42,6 +44,8 @@ int main(int argc, char *argv[]) bool mipmaps = false; bool faces = false; bool savePNG = false; + bool rgbm = false; + bool histogram = true; nv::Path input; nv::Path output; @@ -57,10 +61,18 @@ int main(int argc, char *argv[]) { mipmaps = true; } + else if (strcmp("-rgbm", argv[i]) == 0) + { + rgbm = true; + } else if (strcmp("-faces", argv[i]) == 0) { faces = true; } + else if (strcmp("-histogram", argv[i]) == 0) + { + histogram = true; + } else if (strcmp("-format", argv[i]) == 0) { if (i+1 == argc) break; @@ -109,90 +121,125 @@ int main(int argc, char *argv[]) printf("Note: the .tga or .png extension is forced on outfile\n\n"); printf("Input options:\n"); - printf(" -forcenormal \tThe input image is a normal map.\n"); - printf(" -mipmaps \tDecompress all mipmaps.\n"); - printf(" -faces \tDecompress all faces.\n"); - printf(" -format \tOutput format ('tga' or 'png').\n"); + printf(" -forcenormal The input image is a normal map.\n"); + printf(" -mipmaps Decompress all mipmaps.\n"); + printf(" -faces Decompress all faces.\n"); + printf(" -histogram Output histogram.\n"); + printf(" -format Output format ('tga' or 'png').\n"); return 1; } - // Load surface. - // !!! DirectDrawSurface API doesn't support float images, so BC6 will be converted to 8-bit on load. - // Should use nvtt::Surface instead. - nv::DirectDrawSurface dds(input.str()); - if (!dds.isValid()) - { - fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str()); - return 1; - } - - if (!dds.isSupported() || dds.isTexture3D()) - { - fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str()); - return 1; - } - - uint faceCount; - if (dds.isTexture2D()) - { - faceCount = 1; - } - else - { - nvCheck(dds.isTextureCube()); - faceCount = 6; - } - - uint mipmapCount = dds.mipmapCount(); - - clock_t start = clock(); - - // apply arguments - if (forcenormal) - { - dds.setNormalFlag(true); - } - if (!faces) - { - faceCount = 1; - } - if (!mipmaps) - { - mipmapCount = 1; - } - - nv::Image mipmap; - nv::Path name; - - // strip extension, we force the tga extension - output.stripExtension(); - - // extract faces and mipmaps - for (uint f = 0; f < faceCount; f++) - { - for (uint m = 0; m < mipmapCount; m++) - { - dds.mipmap(&mipmap, f, m); - - // set output filename, if we are doing faces and/or mipmaps - name.copy(output); - if (faces) name.appendFormat("_face%d", f); - if (mipmaps) name.appendFormat("_mipmap%d", m); - name.append(savePNG ? ".png" : ".tga"); - - nv::StdOutputStream stream(name.str()); - if (stream.isError()) { - fprintf(stderr, "Error opening '%s' for writting\n", name.str()); - return 1; - } - - nv::ImageIO::save(name.str(), stream, &mipmap); - } - } - clock_t end = clock(); - printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); + if (histogram) { + nvtt::Surface img; + if (!img.load(input.str())) { + fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str()); + return 1; + } + + float exposure = 2.2f; + float scale = 1.0f / exposure; + img.scaleBias(0, scale, 0); + img.scaleBias(1, scale, 0); + img.scaleBias(2, scale, 0); + + //img.toneMap(nvtt::ToneMapper_Reindhart, NULL); + //img.toSrgb(); + img.toGamma(2.2f); + + nvtt::Surface hist = nvtt::histogram(img, 3*512, 128); + + // Resize for pretier histograms. + hist.resize(512, 128, 1, nvtt::ResizeFilter_Box); + + nv::Path name; + name.copy(output); + name.stripExtension(); + name.append(".histogram"); + name.append(savePNG ? ".png" : ".tga"); + + hist.save(name.str()); + } + else { + + // Load surface. + // !!! DirectDrawSurface API doesn't support float images, so BC6 will be converted to 8-bit on load. + // Should use nvtt::Surface instead. + nv::DirectDrawSurface dds(input.str()); + if (!dds.isValid()) + { + fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str()); + return 1; + } + + if (!dds.isSupported() || dds.isTexture3D()) + { + fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str()); + return 1; + } + + uint faceCount; + if (dds.isTexture2D()) + { + faceCount = 1; + } + else + { + nvCheck(dds.isTextureCube()); + faceCount = 6; + } + + uint mipmapCount = dds.mipmapCount(); + + clock_t start = clock(); + + // apply arguments + if (forcenormal) + { + dds.setNormalFlag(true); + } + if (!faces) + { + faceCount = 1; + } + if (!mipmaps) + { + mipmapCount = 1; + } + + nv::Image mipmap; + nv::Path name; + + // strip extension, we force the tga extension + output.stripExtension(); + + // extract faces and mipmaps + for (uint f = 0; f < faceCount; f++) + { + for (uint m = 0; m < mipmapCount; m++) + { + dds.mipmap(&mipmap, f, m); + + // set output filename, if we are doing faces and/or mipmaps + name.copy(output); + if (faces) name.appendFormat("_face%d", f); + if (mipmaps) name.appendFormat("_mipmap%d", m); + name.append(savePNG ? ".png" : ".tga"); + + nv::StdOutputStream stream(name.str()); + if (stream.isError()) { + fprintf(stderr, "Error opening '%s' for writting\n", name.str()); + return 1; + } + + nv::ImageIO::save(name.str(), stream, &mipmap); + } + } + + clock_t end = clock(); + printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); + } return 0; } diff --git a/src/nvtt/tools/imgdiff.cpp b/src/nvtt/tools/imgdiff.cpp index 9809cf4..3dd5eca 100644 --- a/src/nvtt/tools/imgdiff.cpp +++ b/src/nvtt/tools/imgdiff.cpp @@ -23,18 +23,15 @@ #include "cmdline.h" -#include "nvmath/Color.h" -#include "nvmath/Vector.inl" - -#include "nvimage/Image.h" -#include "nvimage/DirectDrawSurface.h" +#include "nvtt/nvtt.h" #include "nvcore/StrLib.h" -#include "nvcore/StdStream.h" +#include "nvmath/nvmath.h" +#include // strstr #include - +/* static bool loadImage(nv::Image & image, const char * fileName) { if (nv::strCaseDiff(nv::Path::extension(fileName), ".dds") == 0) @@ -160,7 +157,7 @@ static float luma(const nv::Color32 & c) { //return 0.333f * float(c.r) + 0.334f * float(c.g) + 0.333f * float(c.b); //return 0.1f * float(c.r) + 0.8f * float(c.g) + 0.1f * float(c.g); } - +*/ int main(int argc, char *argv[]) { @@ -169,6 +166,7 @@ int main(int argc, char *argv[]) bool compareNormal = false; bool compareAlpha = false; + bool rangescale = false; nv::Path input0; nv::Path input1; @@ -178,14 +176,18 @@ int main(int argc, char *argv[]) for (int i = 1; i < argc; i++) { // Input options. - if (strcmp("-normal", argv[i]) == 0) + if (nv::strEqual("-normal", argv[i])) { compareNormal = true; } - else if (strcmp("-alpha", argv[i]) == 0) + else if (nv::strEqual("-alpha", argv[i])) { compareAlpha = true; } + else if (nv::strEqual("-rangescale", argv[i])) + { + rangescale = true; + } else if (argv[i][0] != '-') { input0 = argv[i]; @@ -209,12 +211,105 @@ int main(int argc, char *argv[]) printf("usage: nvimgdiff [options] original_file updated_file [output]\n\n"); printf("Diff options:\n"); - printf(" -normal \tCompare images as if they were normal maps.\n"); - printf(" -alpha \tCompare alpha weighted images.\n"); + printf(" -normal Compare images as if they were normal maps.\n"); + printf(" -alpha Compare alpha weighted images.\n"); + printf(" -rangescale Scale second image based on range of first one.\n"); return 1; } + nvtt::Surface image0, image1; + + if (!image0.load(input0.str())) { + printf("Error loading %s.", input0.str()); + return 1; + } + if (!image1.load(input1.str())) { + printf("Error loading %s.", input1.str()); + return 1; + } + + if (compareNormal) { + image0.setNormalMap(true); + image1.setNormalMap(true); + } + if (compareAlpha) { + image0.setAlphaMode(nvtt::AlphaMode_Transparency); + } + + // Do some transforms based on the naming convention of the file. + if (strstr(input1.str(), "rgbm")) { + + //image0.toGamma(2); + + image1.fromRGBM(1.0f, 0.25f); + image1.toLinear(2); + + image1.copyChannel(image0, 3); // Copy alpha channel from source. + image1.setAlphaMode(nvtt::AlphaMode_Transparency); + + rangescale = true; + } + + if (strstr(input1.str(), "bc6")) { + // @@ Do any transform that we may have done before compression. + + image1.copyChannel(image0, 3); // Copy alpha channel from source. + image1.setAlphaMode(nvtt::AlphaMode_Transparency); + } + + + // Scale second image to range of the first one. + if (rangescale) { + float min_color[3], max_color[3]; + image0.range(0, &min_color[0], &max_color[0]); + image0.range(1, &min_color[1], &max_color[1]); + image0.range(2, &min_color[2], &max_color[2]); + float color_range = nv::max3(max_color[0], max_color[1], max_color[2]); + + const float max_color_range = 16.0f; + if (color_range > max_color_range) color_range = max_color_range; + +#if 0 + for (int i = 0; i < 3; i++) { + image0.scaleBias(i, 1.0f / color_range, 0.0f); + } + image0.toneMap(nvtt::ToneMapper_Linear, NULL); // Clamp without changing the hue. +#else + for (int i = 0; i < 3; i++) { + image1.scaleBias(i, color_range, 0.0f); + } +#endif + } + + float rmse = nvtt::rmsError(image0, image1); + //float rmsa = nvtt::rmsAlphaError(image0, image1); + + // In The Witness: + // exposure = key_value / luminance + // key_value = 0.22 + // min_luminance = 0.1 -> exposure = 2.2 + // max_luminance = 1.0 -> exposure = 0.22 + + float rmse0 = nvtt::rmsToneMappedError(image0, image1, 2.2f); + float rmse1 = nvtt::rmsToneMappedError(image0, image1, 1.0f); + float rmse2 = nvtt::rmsToneMappedError(image0, image1, 0.22f); + + printf("RMSE = %.5f %.5f %.5f -> %.5f | %.5f\n", rmse0, rmse1, rmse2, (rmse0 + rmse1 + rmse2)/3, rmse); + + + //printf("MSE = %f\n", rmse * rmse); + //printf("RMSE = %f\n", rmse); + //printf("PSNR = %f\n", (rmse == 0) ? 999.0 : 20.0 * log10(255.0 / rmse)); + + if (compareNormal) { + // @@ Does this assume normal maps are packed or unpacked? + float ae = nvtt::angularError(image0, image1); + printf("AE = %f\n", ae); + } + + +#if 0 nv::Image image0, image1; if (!loadImage(image0, input0.str())) return 0; if (!loadImage(image1, input1.str())) return 0; @@ -304,6 +399,7 @@ int main(int argc, char *argv[]) error_a.print(); } +#endif // @@ Write image difference. return 0;