diff --git a/project/vc9/nvtt/nvtt.vcproj b/project/vc9/nvtt/nvtt.vcproj index d0de758..8a867c8 100644 --- a/project/vc9/nvtt/nvtt.vcproj +++ b/project/vc9/nvtt/nvtt.vcproj @@ -1052,14 +1052,6 @@ RelativePath="..\..\..\src\nvtt\CompressorRGB.h" > - - - - diff --git a/src/nvimage/FloatImage.cpp b/src/nvimage/FloatImage.cpp index 0265a2a..f0405b9 100644 --- a/src/nvimage/FloatImage.cpp +++ b/src/nvimage/FloatImage.cpp @@ -956,7 +956,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, W void FloatImage::convolve(const Kernel2 & k, uint c, WrapMode wm) { - AutoPtr tmpImage = clone(); + AutoPtr tmpImage(clone()); uint w = m_width; uint h = m_height; diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h index 8163eb3..aa28a84 100644 --- a/src/nvmath/nvmath.h +++ b/src/nvmath/nvmath.h @@ -152,11 +152,13 @@ namespace nv return value; } +#if NV_CC_MSVC inline float log2f(float x) { nvCheck(x >= 0); return logf(x) / logf(2.0f); } +#endif inline float lerp(float f0, float f1, float t) { @@ -195,10 +197,9 @@ namespace nv return f - floor(f); } - inline float fround(float f) // @@ rename floatRound + inline float floatRound(float f) { - // @@ Do something better. - return float(iround(f)); + return floorf(f + 0.5f); } // Eliminates negative zeros from a float array. @@ -234,6 +235,22 @@ namespace nv return 0; } + // I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule. + + // Quantize a [0, 1] full precision float, using exact endpoints. + inline float quantizeFloat(float f, int bits) { + float scale = (1 << bits) - 1; + float offset = 0.0f; + return floor(saturate(f) * scale + offset) / scale; + } + + // Quantize a [0, 1] full precision float, using uniform bins. + /*inline float quantizeFloat(float f, int bits) { + float scale = (1 << bits); + float offset = 0.5f; + return floor(saturate(f) * scale + offset) / scale; + }*/ + } // nv #endif // NV_MATH_H diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt index e60f35b..3a0da41 100644 --- a/src/nvtt/CMakeLists.txt +++ b/src/nvtt/CMakeLists.txt @@ -14,7 +14,6 @@ SET(NVTT_SRCS CompressorDX10.h CompressorDX10.cpp # CompressorDX11.h CompressorDX11.cpp CompressorRGB.h CompressorRGB.cpp - CompressorRGBE.h CompressorRGBE.cpp Context.h Context.cpp QuickCompressDXT.h QuickCompressDXT.cpp OptimalCompressDXT.h OptimalCompressDXT.cpp diff --git a/src/nvtt/CompressorRGBE.cpp b/src/nvtt/CompressorRGBE.cpp deleted file mode 100644 index 3dccbac..0000000 --- a/src/nvtt/CompressorRGBE.cpp +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (c) 2009-2011 Ignacio Castano -// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "CompressorRGBE.h" -#include "CompressionOptions.h" -#include "OutputOptions.h" - -#include "nvimage/Image.h" -#include "nvimage/FloatImage.h" - -#include "nvmath/Color.h" - -#include "nvcore/Debug.h" - -using namespace nv; -using namespace nvtt; - -static Color32 toRgbe8(float r, float g, float b) -{ - Color32 c; - float v = max(max(r, g), b); - if (v < 1e-32) { - c.r = c.g = c.b = c.a = 0; - } - else { - int e; - v = frexp(v, &e) * 256.0f / v; - c.r = uint8(clamp(r * v, 0.0f, 255.0f)); - c.g = uint8(clamp(g * v, 0.0f, 255.0f)); - c.b = uint8(clamp(b * v, 0.0f, 255.0f)); - c.a = e + 128; - } - - return c; -} - - -void CompressorRGBE::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - nvDebugCheck (compressionOptions.format == nvtt::Format_RGBE); - - uint srcPitch = w; - uint srcPlane = w * h * d; - - // Allocate output scanline. - Color32 * dst = new Color32[w]; - - for (uint y = 0; y < h*d; y++) - { - const float * src = (const float *)data + y * srcPitch; - - for (uint x = 0; x < w; x++) - { - float r = src[x + 0 * srcPlane]; - float g = src[x + 1 * srcPlane]; - float b = src[x + 2 * srcPlane]; - - dst[x] = toRgbe8(r, g, b); - } - - if (outputOptions.outputHandler != NULL) - { - outputOptions.outputHandler->writeData(dst, w * 4); - } - } - - delete [] dst; -} diff --git a/src/nvtt/CompressorRGBE.h b/src/nvtt/CompressorRGBE.h deleted file mode 100644 index f2905bc..0000000 --- a/src/nvtt/CompressorRGBE.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2009-2011 Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NVTT_COMPRESSORRGBE_H -#define NVTT_COMPRESSORRGBE_H - -#include "Compressor.h" - -namespace nv -{ - struct CompressorRGBE : public CompressorInterface - { - virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - }; - -} // nv namespace - - -#endif // NVTT_COMPRESSORRGBE_H diff --git a/src/nvtt/Context.cpp b/src/nvtt/Context.cpp index 8baec6f..ce1cd5c 100644 --- a/src/nvtt/Context.cpp +++ b/src/nvtt/Context.cpp @@ -35,7 +35,6 @@ #include "CompressorDX10.h" #include "CompressorDX11.h" #include "CompressorRGB.h" -#include "CompressorRGBE.h" #include "cuda/CudaUtils.h" #include "cuda/CudaCompressorDXT.h" @@ -763,10 +762,6 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression { // Not supported. } - else if (compressionOptions.format == Format_RGBE) - { - return new CompressorRGBE; - } return NULL; } diff --git a/src/nvtt/Surface.cpp b/src/nvtt/Surface.cpp index 1586ce0..660d27d 100644 --- a/src/nvtt/Surface.cpp +++ b/src/nvtt/Surface.cpp @@ -1310,6 +1310,232 @@ void Surface::fromRGBM(float range/*= 1*/) } +static Color32 toRgbe8(float r, float g, float b) +{ + Color32 c; + float v = max(max(r, g), b); + if (v < 1e-32) { + c.r = c.g = c.b = c.a = 0; + } + else { + int e; + v = frexp(v, &e) * 256.0f / v; + c.r = uint8(clamp(r * v, 0.0f, 255.0f)); + c.g = uint8(clamp(g * v, 0.0f, 255.0f)); + c.b = uint8(clamp(b * v, 0.0f, 255.0f)); + c.a = e + 128; + } + + return c; +} + + +/* + Alen Ladavac @ GDAlgorithms-list on Feb 7, 2007: + One trick that we use to alleviate such problems is to use RGBE5.3 - + i.e. have a fixed point exponent. Note that it is not enough to just + shift the exponent up for 3 bits, but you actually have to convert + each pixel in the RGBE8 texture by unpacking it to floats and then + repacking it with a non-integer exponent, which gives different + mantissas as well. Now your jumps in exponent are much smaller, thus + the bands are not that noticeable. It is still not as good as FP16, + but it is much better than RGBE8. I hope this explanation is + understandable, if not I can fill in more details. + + Though there still are some bands, you can get an even better + precision if you upload that same texture as RGBA16, because you'll + get even more interpolation then, and it works good as a scalable + option for people with more GPU RAM). Alternatively, when some of the + future cards (hopefully, because I'm trying to lobby for that + everywhere :) ), start returning more than 8 bits, your scenes will + automatically look better even without using RGBA16. + + Jon Watte: + The interpolation of 5.3 is the same as that of 8 bits, because it's a + fixed point format. + + The reason using 5.3 helps, is that each bit of quantization in the + interpolation only means 1/8th of a fully significant bit. The + quantization still happens, it's just less visible. The trade-off is + that you get less dynamic range. + + Alen Ladavac: + True, but it is just a small part of the improvement. The greater part + is that RGB values have to be calculated according to the fractional + exponent. With integer exponent, the RGB values jump by a factor of 2 + when each bit changes in exponent, and 5.3 with correct adjustment of + RGB lowers this jump to be about 1.09, which is much better. I may not + be entirely correct on the numbers, which I'm pulling out from my + memory now, but it's a rough estimate. +*/ +/* Ward's version: +static Color32 toRgbe8(float r, float g, float b) +{ + Color32 c; + float v = max(max(r, g), b); + if (v < 1e-32) { + c.r = c.g = c.b = c.a = 0; + } + else { + int e; + v = frexp(v, &e) * 256.0f / v; + c.r = uint8(clamp(r * v, 0.0f, 255.0f)); + c.g = uint8(clamp(g * v, 0.0f, 255.0f)); + c.b = uint8(clamp(b * v, 0.0f, 255.0f)); + c.a = e + 128; + } + + return c; +} +*/ +// For R9G9B9E5, use toRGBE(9, 5), for Ward's RGBE, use toRGBE(8, 8) +void Surface::toRGBE(int mantissaBits, int exponentBits) +{ + // According to the OpenGL extension: + // http://www.opengl.org/registry/specs/EXT/texture_shared_exponent.txt + // + // Components red, green, and blue are first clamped (in the process, + // mapping NaN to zero) so: + // + // red_c = max(0, min(sharedexp_max, red)) + // green_c = max(0, min(sharedexp_max, green)) + // blue_c = max(0, min(sharedexp_max, blue)) + // + // where sharedexp_max is (2^N-1)/2^N * 2^(Emax-B), N is the number + // of mantissa bits per component, Emax is the maximum allowed biased + // exponent value (careful: not necessarily 2^E-1 when E is the number of + // exponent bits), bits, and B is the exponent bias. For the RGB9_E5_EXT + // format, N=9, Emax=31, and B=15. + // + // The largest clamped component, max_c, is determined: + // + // max_c = max(red_c, green_c, blue_c) + // + // A preliminary shared exponent is computed: + // + // exp_shared_p = max(-B-1, floor(log2(max_c))) + 1 + B + // + // A refined shared exponent is then computed as: + // + // max_s = floor(max_c / 2^(exp_shared_p - B - N) + 0.5) + // + // { exp_shared_p, 0 <= max_s < 2^N + // exp_shared = { + // { exp_shared_p+1, max_s == 2^N + // + // These integers values in the range 0 to 2^N-1 are then computed: + // + // red_s = floor(red_c / 2^(exp_shared - B - N) + 0.5) + // green_s = floor(green_c / 2^(exp_shared - B - N) + 0.5) + // blue_s = floor(blue_c / 2^(exp_shared - B - N) + 0.5) + + if (isNull()) return; + + detach(); + + // mantissaBits = N + // exponentBits = E + // exponentMax = Emax + // exponentBias = B + // maxValue = sharedexp_max + + // max exponent: 5 -> 31, 8 -> 255 + const int exponentMax = (1 << exponentBits) - 1; + + // exponent bias: 5 -> 15, 8 -> 127 + const int exponentBias = (1 << (exponentBits - 1)) - 1; + + // Maximum representable value: 5 -> 63488, 8 -> HUGE + const float maxValue = float(exponentMax) / float(exponentMax + 1) * float(1 << (exponentMax - exponentBias)); + + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + // Clamp components: + float R = ::clamp(r[i], 0.0f, maxValue); + float G = ::clamp(g[i], 0.0f, maxValue); + float B = ::clamp(b[i], 0.0f, maxValue); + + // Compute max: + float M = max(R, G, B); + + // Preliminary exponent: + float E = max(- exponentBias - 1, ifloor(log2f(M))) + 1 + exponentBias; + + // Refine exponent: + int max_s = iround(M / exp2(E - exponentBias - mantissaBits)); + if (max_s == (1 << mantissaBits)) E += 1.0f; + + R = floatRound(R / exp2(E - exponentBias - mantissaBits)); + G = floatRound(G / exp2(E - exponentBias - mantissaBits)); + B = floatRound(B / exp2(E - exponentBias - mantissaBits)); + + nvDebugCheck(R >= 0 && R <= ((1 << mantissaBits) - 1)); + nvDebugCheck(G >= 0 && G <= ((1 << mantissaBits) - 1)); + nvDebugCheck(B >= 0 && B <= ((1 << mantissaBits) - 1)); + + // Store in [0, 1] range. + r[i] = R / ((1 << mantissaBits) - 1); + g[i] = G / ((1 << mantissaBits) - 1); + b[i] = B / ((1 << mantissaBits) - 1); + a[i] = E / ((1 << exponentBits) - 1); + } +} + +void Surface::fromRGBE(int mantissaBits, int exponentBits) +{ + // According to the OpenGL extension: + // http://www.opengl.org/registry/specs/EXT/texture_shared_exponent.txt + // + // The 1st, 2nd, 3rd, and 4th components are called + // p_red, p_green, p_blue, and p_exp respectively and are treated as + // unsigned integers. These are then used to compute floating-point + // RGB components (ignoring the "Conversion to floating-point" section + // below in this case) as follows: + // + // red = p_red * 2^(p_exp - B - N) + // green = p_green * 2^(p_exp - B - N) + // blue = p_blue * 2^(p_exp - B - N) + // + // where B is 15 (the exponent bias) and N is 9 (the number of mantissa + // bits)." + + if (isNull()) return; + + detach(); + + // exponent bias: 5 -> 15, 8 -> 127 + const float exponentBias = (1 << (exponentBits - 1)) - 1; + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + // RGBE are assumed to be in the [0, 1] range. + float R = r[i] * ((1 << mantissaBits) - 1); + float G = g[i] * ((1 << mantissaBits) - 1); + float B = b[i] * ((1 << mantissaBits) - 1); + float E = a[i] * ((1 << exponentBits) - 1); + + float M = pow(2, E - exponentBias - mantissaBits); + + r[i] = R * M; + g[i] = G * M; + b[i] = B * M; + a[i] = 1; + } +} + // Y is in the [0, 1] range, while CoCg are in the [-1, 1] range. void Surface::toYCoCg() { diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index 22eacdb..354c5e4 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -70,6 +70,11 @@ namespace nvtt struct Surface; struct CubeSurface; + + // @@ I wish I had distinguished between "formats" and compressors. + // That is, 'DXT1' is a format 'DXT1a' and 'DXT1n' are DXT1 compressors. + // That is, 'DXT3' is a format 'DXT3n' is a DXT3 compressor. + // Having multiple enums for the same ids only creates confusion. Clean this up. /// Supported compression formats. enum Format { @@ -98,8 +103,6 @@ namespace nvtt Format_BC6, // Not supported yet. Format_BC7, // Not supported yet. - - Format_RGBE, }; /// Pixel types. These basically indicate how the output should be interpreted, but do not have any influence over the input. @@ -472,6 +475,8 @@ namespace nvtt //NVTT_API bool normalizeRange(float * rangeMin, float * rangeMax); NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.0f); NVTT_API void fromRGBM(float range = 1.0f); + NVTT_API void toRGBE(int mantissaBits, int exponentBits); + NVTT_API void fromRGBE(int mantissaBits, int exponentBits); NVTT_API void toYCoCg(); NVTT_API void blockScaleCoCg(int bits = 5, float threshold = 0.0f); NVTT_API void fromYCoCg();