diff --git a/src/nvcore/Debug.cpp b/src/nvcore/Debug.cpp index f980c07..616b84c 100644 --- a/src/nvcore/Debug.cpp +++ b/src/nvcore/Debug.cpp @@ -453,7 +453,7 @@ namespace { MSG msg; while( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) { - if( msg.message == WM_QUIT ) break; + //if( msg.message == WM_QUIT ) break; TranslateMessage( &msg ); DispatchMessage( &msg ); } @@ -467,12 +467,11 @@ namespace StringBuilder error_string; if( func != NULL ) { error_string.format( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); - nvDebug( error_string.str() ); } else { error_string.format( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line ); - nvDebug( error_string.str() ); } + nvDebug( error_string.str() ); if (debug::isDebuggerPresent()) { return NV_ABORT_DEBUG; diff --git a/src/nvcore/Utils.h b/src/nvcore/Utils.h index dd7fd5b..4498f34 100644 --- a/src/nvcore/Utils.h +++ b/src/nvcore/Utils.h @@ -70,15 +70,11 @@ namespace nv b = temp; } - /// Return the maximum of the two arguments. + /// Return the maximum of the two arguments. For floating point values, it returns the second value if the first is NaN. template inline const T & max(const T & a, const T & b) { - //return std::max(a, b); - if( a < b ) { - return b; - } - return a; + return (b < a) ? a : b; } /// Return the maximum of the three arguments. @@ -92,11 +88,7 @@ namespace nv template inline const T & min(const T & a, const T & b) { - //return std::min(a, b); - if( b < a ) { - return b; - } - return a; + return (a < b) ? a : b; } /// Return the maximum of the three arguments. diff --git a/src/nvimage/BlockDXT.cpp b/src/nvimage/BlockDXT.cpp index c0dc2f9..24336d7 100644 --- a/src/nvimage/BlockDXT.cpp +++ b/src/nvimage/BlockDXT.cpp @@ -1,673 +1,673 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "BlockDXT.h" -#include "ColorBlock.h" - -#include "nvcore/Stream.h" -#include "nvcore/Utils.h" // swap - - -using namespace nv; - - -/*---------------------------------------------------------------------------- -BlockDXT1 -----------------------------------------------------------------------------*/ - -uint BlockDXT1::evaluatePalette(Color32 color_array[4], bool d3d9/*= false*/) const -{ - // Does bit expansion before interpolation. - color_array[0].b = (col0.b << 3) | (col0.b >> 2); - color_array[0].g = (col0.g << 2) | (col0.g >> 4); - color_array[0].r = (col0.r << 3) | (col0.r >> 2); - color_array[0].a = 0xFF; - - // @@ Same as above, but faster? - // Color32 c; - // c.u = ((col0.u << 3) & 0xf8) | ((col0.u << 5) & 0xfc00) | ((col0.u << 8) & 0xf80000); - // c.u |= (c.u >> 5) & 0x070007; - // c.u |= (c.u >> 6) & 0x000300; - // color_array[0].u = c.u; - - color_array[1].r = (col1.r << 3) | (col1.r >> 2); - color_array[1].g = (col1.g << 2) | (col1.g >> 4); - color_array[1].b = (col1.b << 3) | (col1.b >> 2); - color_array[1].a = 0xFF; - - // @@ Same as above, but faster? - // c.u = ((col1.u << 3) & 0xf8) | ((col1.u << 5) & 0xfc00) | ((col1.u << 8) & 0xf80000); - // c.u |= (c.u >> 5) & 0x070007; - // c.u |= (c.u >> 6) & 0x000300; - // color_array[1].u = c.u; - - if( col0.u > col1.u ) { - int bias = 0; - if (d3d9) bias = 1; - - // Four-color block: derive the other two colors. - color_array[2].r = (2 * color_array[0].r + color_array[1].r + bias) / 3; - color_array[2].g = (2 * color_array[0].g + color_array[1].g + bias) / 3; - color_array[2].b = (2 * color_array[0].b + color_array[1].b + bias) / 3; - color_array[2].a = 0xFF; - - color_array[3].r = (2 * color_array[1].r + color_array[0].r + bias) / 3; - color_array[3].g = (2 * color_array[1].g + color_array[0].g + bias) / 3; - color_array[3].b = (2 * color_array[1].b + color_array[0].b + bias) / 3; - color_array[3].a = 0xFF; - - return 4; - } - else { - // Three-color block: derive the other color. - color_array[2].r = (color_array[0].r + color_array[1].r) / 2; - color_array[2].g = (color_array[0].g + color_array[1].g) / 2; - color_array[2].b = (color_array[0].b + color_array[1].b) / 2; - color_array[2].a = 0xFF; - - // Set all components to 0 to match DXT specs. - color_array[3].r = 0x00; // color_array[2].r; - color_array[3].g = 0x00; // color_array[2].g; - color_array[3].b = 0x00; // color_array[2].b; - color_array[3].a = 0x00; - - return 3; - } -} - - -uint BlockDXT1::evaluatePaletteNV5x(Color32 color_array[4]) const -{ - // Does bit expansion before interpolation. - color_array[0].b = (3 * col0.b * 22) / 8; - color_array[0].g = (col0.g << 2) | (col0.g >> 4); - color_array[0].r = (3 * col0.r * 22) / 8; - color_array[0].a = 0xFF; - - color_array[1].r = (3 * col1.r * 22) / 8; - color_array[1].g = (col1.g << 2) | (col1.g >> 4); - color_array[1].b = (3 * col1.b * 22) / 8; - color_array[1].a = 0xFF; - - int gdiff = color_array[1].g - color_array[0].g; - - if( col0.u > col1.u ) { - // Four-color block: derive the other two colors. - color_array[2].r = ((2 * col0.r + col1.r) * 22) / 8; - color_array[2].g = (256 * color_array[0].g + gdiff / 4 + 128 + gdiff * 80) / 256; - color_array[2].b = ((2 * col0.b + col1.b) * 22) / 8; - color_array[2].a = 0xFF; - - color_array[3].r = ((2 * col1.r + col0.r) * 22) / 8; - color_array[3].g = (256 * color_array[1].g - gdiff / 4 + 128 - gdiff * 80) / 256; - color_array[3].b = ((2 * col1.b + col0.b) * 22) / 8; - color_array[3].a = 0xFF; - - return 4; - } - else { - // Three-color block: derive the other color. - color_array[2].r = ((col0.r + col1.r) * 33) / 8; - color_array[2].g = (256 * color_array[0].g + gdiff / 4 + 128 + gdiff * 128) / 256; - color_array[2].b = ((col0.b + col1.b) * 33) / 8; - color_array[2].a = 0xFF; - - // Set all components to 0 to match DXT specs. - color_array[3].r = 0x00; // color_array[2].r; - color_array[3].g = 0x00; // color_array[2].g; - color_array[3].b = 0x00; // color_array[2].b; - color_array[3].a = 0x00; - - return 3; - } -} - -// Evaluate palette assuming 3 color block. -void BlockDXT1::evaluatePalette3(Color32 color_array[4], bool d3d9) const -{ - color_array[0].b = (col0.b << 3) | (col0.b >> 2); - color_array[0].g = (col0.g << 2) | (col0.g >> 4); - color_array[0].r = (col0.r << 3) | (col0.r >> 2); - color_array[0].a = 0xFF; - - color_array[1].r = (col1.r << 3) | (col1.r >> 2); - color_array[1].g = (col1.g << 2) | (col1.g >> 4); - color_array[1].b = (col1.b << 3) | (col1.b >> 2); - color_array[1].a = 0xFF; - - // Three-color block: derive the other color. - color_array[2].r = (color_array[0].r + color_array[1].r) / 2; - color_array[2].g = (color_array[0].g + color_array[1].g) / 2; - color_array[2].b = (color_array[0].b + color_array[1].b) / 2; - color_array[2].a = 0xFF; - - // Set all components to 0 to match DXT specs. - color_array[3].r = 0x00; // color_array[2].r; - color_array[3].g = 0x00; // color_array[2].g; - color_array[3].b = 0x00; // color_array[2].b; - color_array[3].a = 0x00; -} - -// Evaluate palette assuming 4 color block. -void BlockDXT1::evaluatePalette4(Color32 color_array[4], bool d3d9) const -{ - color_array[0].b = (col0.b << 3) | (col0.b >> 2); - color_array[0].g = (col0.g << 2) | (col0.g >> 4); - color_array[0].r = (col0.r << 3) | (col0.r >> 2); - color_array[0].a = 0xFF; - - color_array[1].r = (col1.r << 3) | (col1.r >> 2); - color_array[1].g = (col1.g << 2) | (col1.g >> 4); - color_array[1].b = (col1.b << 3) | (col1.b >> 2); - color_array[1].a = 0xFF; - - int bias = 0; - if (d3d9) bias = 1; - - // Four-color block: derive the other two colors. - color_array[2].r = (2 * color_array[0].r + color_array[1].r + bias) / 3; - color_array[2].g = (2 * color_array[0].g + color_array[1].g + bias) / 3; - color_array[2].b = (2 * color_array[0].b + color_array[1].b + bias) / 3; - color_array[2].a = 0xFF; - - color_array[3].r = (2 * color_array[1].r + color_array[0].r + bias) / 3; - color_array[3].g = (2 * color_array[1].g + color_array[0].g + bias) / 3; - color_array[3].b = (2 * color_array[1].b + color_array[0].b + bias) / 3; - color_array[3].a = 0xFF; -} - - -void BlockDXT1::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const -{ - nvDebugCheck(block != NULL); - - // Decode color block. - Color32 color_array[4]; - evaluatePalette(color_array, d3d9); - - // Write color block. - for( uint j = 0; j < 4; j++ ) { - for( uint i = 0; i < 4; i++ ) { - uint idx = (row[j] >> (2 * i)) & 3; - block->color(i, j) = color_array[idx]; - } - } -} - -void BlockDXT1::decodeBlockNV5x(ColorBlock * block) const -{ - nvDebugCheck(block != NULL); - - // Decode color block. - Color32 color_array[4]; - evaluatePaletteNV5x(color_array); - - // Write color block. - for( uint j = 0; j < 4; j++ ) { - for( uint i = 0; i < 4; i++ ) { - uint idx = (row[j] >> (2 * i)) & 3; - block->color(i, j) = color_array[idx]; - } - } -} - -void BlockDXT1::setIndices(int * idx) -{ - indices = 0; - for(uint i = 0; i < 16; i++) { - indices |= (idx[i] & 3) << (2 * i); - } -} - - -/// Flip DXT1 block vertically. -inline void BlockDXT1::flip4() -{ - swap(row[0], row[3]); - swap(row[1], row[2]); -} - -/// Flip half DXT1 block vertically. -inline void BlockDXT1::flip2() -{ - swap(row[0], row[1]); -} - - -/*---------------------------------------------------------------------------- -BlockDXT3 -----------------------------------------------------------------------------*/ - -void BlockDXT3::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const -{ - nvDebugCheck(block != NULL); - - // Decode color. - color.decodeBlock(block, d3d9); - - // Decode alpha. - alpha.decodeBlock(block, d3d9); -} - -void BlockDXT3::decodeBlockNV5x(ColorBlock * block) const -{ - nvDebugCheck(block != NULL); - - color.decodeBlockNV5x(block); - alpha.decodeBlock(block); -} - -void AlphaBlockDXT3::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const -{ - nvDebugCheck(block != NULL); - - block->color(0x0).a = (alpha0 << 4) | alpha0; - block->color(0x1).a = (alpha1 << 4) | alpha1; - block->color(0x2).a = (alpha2 << 4) | alpha2; - block->color(0x3).a = (alpha3 << 4) | alpha3; - block->color(0x4).a = (alpha4 << 4) | alpha4; - block->color(0x5).a = (alpha5 << 4) | alpha5; - block->color(0x6).a = (alpha6 << 4) | alpha6; - block->color(0x7).a = (alpha7 << 4) | alpha7; - block->color(0x8).a = (alpha8 << 4) | alpha8; - block->color(0x9).a = (alpha9 << 4) | alpha9; - block->color(0xA).a = (alphaA << 4) | alphaA; - block->color(0xB).a = (alphaB << 4) | alphaB; - block->color(0xC).a = (alphaC << 4) | alphaC; - block->color(0xD).a = (alphaD << 4) | alphaD; - block->color(0xE).a = (alphaE << 4) | alphaE; - block->color(0xF).a = (alphaF << 4) | alphaF; -} - -/// Flip DXT3 alpha block vertically. -void AlphaBlockDXT3::flip4() -{ - swap(row[0], row[3]); - swap(row[1], row[2]); -} - -/// Flip half DXT3 alpha block vertically. -void AlphaBlockDXT3::flip2() -{ - swap(row[0], row[1]); -} - -/// Flip DXT3 block vertically. -void BlockDXT3::flip4() -{ - alpha.flip4(); - color.flip4(); -} - -/// Flip half DXT3 block vertically. -void BlockDXT3::flip2() -{ - alpha.flip2(); - color.flip2(); -} - - -/*---------------------------------------------------------------------------- -BlockDXT5 -----------------------------------------------------------------------------*/ - -void AlphaBlockDXT5::evaluatePalette(uint8 alpha[8], bool d3d9) const -{ - if (alpha0 > alpha1) { - evaluatePalette8(alpha, d3d9); - } - else { - evaluatePalette6(alpha, d3d9); - } -} - -void AlphaBlockDXT5::evaluatePalette8(uint8 alpha[8], bool d3d9) const -{ - int bias = 0; - if (d3d9) bias = 3; - - // 8-alpha block: derive the other six alphas. - // Bit code 000 = alpha0, 001 = alpha1, others are interpolated. - alpha[0] = alpha0; - alpha[1] = alpha1; - alpha[2] = (6 * alpha[0] + 1 * alpha[1] + bias) / 7; // bit code 010 - alpha[3] = (5 * alpha[0] + 2 * alpha[1] + bias) / 7; // bit code 011 - alpha[4] = (4 * alpha[0] + 3 * alpha[1] + bias) / 7; // bit code 100 - alpha[5] = (3 * alpha[0] + 4 * alpha[1] + bias) / 7; // bit code 101 - alpha[6] = (2 * alpha[0] + 5 * alpha[1] + bias) / 7; // bit code 110 - alpha[7] = (1 * alpha[0] + 6 * alpha[1] + bias) / 7; // bit code 111 -} - -void AlphaBlockDXT5::evaluatePalette6(uint8 alpha[8], bool d3d9) const -{ - int bias = 0; - if (d3d9) bias = 2; - - // 6-alpha block. - // Bit code 000 = alpha0, 001 = alpha1, others are interpolated. - alpha[0] = alpha0; - alpha[1] = alpha1; - alpha[2] = (4 * alpha[0] + 1 * alpha[1] + bias) / 5; // Bit code 010 - alpha[3] = (3 * alpha[0] + 2 * alpha[1] + bias) / 5; // Bit code 011 - alpha[4] = (2 * alpha[0] + 3 * alpha[1] + bias) / 5; // Bit code 100 - alpha[5] = (1 * alpha[0] + 4 * alpha[1] + bias) / 5; // Bit code 101 - alpha[6] = 0x00; // Bit code 110 - alpha[7] = 0xFF; // Bit code 111 -} - -void AlphaBlockDXT5::indices(uint8 index_array[16]) const -{ - index_array[0x0] = bits0; - index_array[0x1] = bits1; - index_array[0x2] = bits2; - index_array[0x3] = bits3; - index_array[0x4] = bits4; - index_array[0x5] = bits5; - index_array[0x6] = bits6; - index_array[0x7] = bits7; - index_array[0x8] = bits8; - index_array[0x9] = bits9; - index_array[0xA] = bitsA; - index_array[0xB] = bitsB; - index_array[0xC] = bitsC; - index_array[0xD] = bitsD; - index_array[0xE] = bitsE; - index_array[0xF] = bitsF; -} - -uint AlphaBlockDXT5::index(uint index) const -{ - nvDebugCheck(index < 16); - - int offset = (3 * index + 16); - return uint((this->u >> offset) & 0x7); -} - -void AlphaBlockDXT5::setIndex(uint index, uint value) -{ - nvDebugCheck(index < 16); - nvDebugCheck(value < 8); - - int offset = (3 * index + 16); - uint64 mask = uint64(0x7) << offset; - this->u = (this->u & ~mask) | (uint64(value) << offset); -} - -void AlphaBlockDXT5::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const -{ - nvDebugCheck(block != NULL); - - uint8 alpha_array[8]; - evaluatePalette(alpha_array, d3d9); - - uint8 index_array[16]; - indices(index_array); - - for(uint i = 0; i < 16; i++) { - block->color(i).a = alpha_array[index_array[i]]; - } -} - -void AlphaBlockDXT5::flip4() -{ - uint64 * b = (uint64 *)this; - - // @@ The masks might have to be byte swapped. - uint64 tmp = (*b & POSH_U64(0x000000000000FFFF)); - tmp |= (*b & POSH_U64(0x000000000FFF0000)) << 36; - tmp |= (*b & POSH_U64(0x000000FFF0000000)) << 12; - tmp |= (*b & POSH_U64(0x000FFF0000000000)) >> 12; - tmp |= (*b & POSH_U64(0xFFF0000000000000)) >> 36; - - *b = tmp; -} - -void AlphaBlockDXT5::flip2() -{ - uint * b = (uint *)this; - - // @@ The masks might have to be byte swapped. - uint tmp = (*b & 0xFF000000); - tmp |= (*b & 0x00000FFF) << 12; - tmp |= (*b & 0x00FFF000) >> 12; - - *b = tmp; -} - -void BlockDXT5::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const -{ - nvDebugCheck(block != NULL); - - // Decode color. - color.decodeBlock(block, d3d9); - - // Decode alpha. - alpha.decodeBlock(block, d3d9); -} - -void BlockDXT5::decodeBlockNV5x(ColorBlock * block) const -{ - nvDebugCheck(block != NULL); - - // Decode color. - color.decodeBlockNV5x(block); - - // Decode alpha. - alpha.decodeBlock(block); -} - -/// Flip DXT5 block vertically. -void BlockDXT5::flip4() -{ - alpha.flip4(); - color.flip4(); -} - -/// Flip half DXT5 block vertically. -void BlockDXT5::flip2() -{ - alpha.flip2(); - color.flip2(); -} - - -/// Decode ATI1 block. -void BlockATI1::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const -{ - uint8 alpha_array[8]; - alpha.evaluatePalette(alpha_array, d3d9); - - uint8 index_array[16]; - alpha.indices(index_array); - - for(uint i = 0; i < 16; i++) { - Color32 & c = block->color(i); - c.b = c.g = c.r = alpha_array[index_array[i]]; - c.a = 255; - } -} - -/// Flip ATI1 block vertically. -void BlockATI1::flip4() -{ - alpha.flip4(); -} - -/// Flip half ATI1 block vertically. -void BlockATI1::flip2() -{ - alpha.flip2(); -} - - -/// Decode ATI2 block. -void BlockATI2::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const -{ - uint8 alpha_array[8]; - uint8 index_array[16]; - - x.evaluatePalette(alpha_array, d3d9); - x.indices(index_array); - - for(uint i = 0; i < 16; i++) { - Color32 & c = block->color(i); - c.r = alpha_array[index_array[i]]; - } - - y.evaluatePalette(alpha_array, d3d9); - y.indices(index_array); - - for(uint i = 0; i < 16; i++) { - Color32 & c = block->color(i); - c.g = alpha_array[index_array[i]]; - c.b = 0; - c.a = 255; - } -} - -/// Flip ATI2 block vertically. -void BlockATI2::flip4() -{ - x.flip4(); - y.flip4(); -} - -/// Flip half ATI2 block vertically. -void BlockATI2::flip2() -{ - x.flip2(); - y.flip2(); -} - - -void BlockCTX1::evaluatePalette(Color32 color_array[4]) const -{ - // Does bit expansion before interpolation. - color_array[0].b = 0x00; - color_array[0].g = col0[1]; - color_array[0].r = col0[0]; - color_array[0].a = 0xFF; - - color_array[1].r = 0x00; - color_array[1].g = col0[1]; - color_array[1].b = col1[0]; - color_array[1].a = 0xFF; - - color_array[2].r = 0x00; - color_array[2].g = (2 * color_array[0].g + color_array[1].g) / 3; - color_array[2].b = (2 * color_array[0].b + color_array[1].b) / 3; - color_array[2].a = 0xFF; - - color_array[3].r = 0x00; - color_array[3].g = (2 * color_array[1].g + color_array[0].g) / 3; - color_array[3].b = (2 * color_array[1].b + color_array[0].b) / 3; - color_array[3].a = 0xFF; -} - -void BlockCTX1::decodeBlock(ColorBlock * block) const -{ - nvDebugCheck(block != NULL); - - // Decode color block. - Color32 color_array[4]; - evaluatePalette(color_array); - - // Write color block. - for( uint j = 0; j < 4; j++ ) { - for( uint i = 0; i < 4; i++ ) { - uint idx = (row[j] >> (2 * i)) & 3; - block->color(i, j) = color_array[idx]; - } - } -} - -void BlockCTX1::setIndices(int * idx) -{ - indices = 0; - for(uint i = 0; i < 16; i++) { - indices |= (idx[i] & 3) << (2 * i); - } -} - - -/// Flip CTX1 block vertically. -inline void BlockCTX1::flip4() -{ - swap(row[0], row[3]); - swap(row[1], row[2]); -} - -/// Flip half CTX1 block vertically. -inline void BlockCTX1::flip2() -{ - swap(row[0], row[1]); -} - - - - -Stream & nv::operator<<(Stream & stream, BlockDXT1 & block) -{ - stream << block.col0.u << block.col1.u; - stream.serialize(&block.indices, sizeof(block.indices)); - return stream; -} - -Stream & nv::operator<<(Stream & stream, AlphaBlockDXT3 & block) -{ - stream.serialize(&block, sizeof(block)); - return stream; -} - -Stream & nv::operator<<(Stream & stream, BlockDXT3 & block) -{ - return stream << block.alpha << block.color; -} - -Stream & nv::operator<<(Stream & stream, AlphaBlockDXT5 & block) -{ - stream.serialize(&block, sizeof(block)); - return stream; -} - -Stream & nv::operator<<(Stream & stream, BlockDXT5 & block) -{ - return stream << block.alpha << block.color; -} - -Stream & nv::operator<<(Stream & stream, BlockATI1 & block) -{ - return stream << block.alpha; -} - -Stream & nv::operator<<(Stream & stream, BlockATI2 & block) -{ - return stream << block.x << block.y; -} - -Stream & nv::operator<<(Stream & stream, BlockCTX1 & block) -{ - stream.serialize(&block, sizeof(block)); - return stream; -} - +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "BlockDXT.h" +#include "ColorBlock.h" + +#include "nvcore/Stream.h" +#include "nvcore/Utils.h" // swap + + +using namespace nv; + + +/*---------------------------------------------------------------------------- +BlockDXT1 +----------------------------------------------------------------------------*/ + +uint BlockDXT1::evaluatePalette(Color32 color_array[4], bool d3d9/*= false*/) const +{ + // Does bit expansion before interpolation. + color_array[0].b = (col0.b << 3) | (col0.b >> 2); + color_array[0].g = (col0.g << 2) | (col0.g >> 4); + color_array[0].r = (col0.r << 3) | (col0.r >> 2); + color_array[0].a = 0xFF; + + // @@ Same as above, but faster? + // Color32 c; + // c.u = ((col0.u << 3) & 0xf8) | ((col0.u << 5) & 0xfc00) | ((col0.u << 8) & 0xf80000); + // c.u |= (c.u >> 5) & 0x070007; + // c.u |= (c.u >> 6) & 0x000300; + // color_array[0].u = c.u; + + color_array[1].r = (col1.r << 3) | (col1.r >> 2); + color_array[1].g = (col1.g << 2) | (col1.g >> 4); + color_array[1].b = (col1.b << 3) | (col1.b >> 2); + color_array[1].a = 0xFF; + + // @@ Same as above, but faster? + // c.u = ((col1.u << 3) & 0xf8) | ((col1.u << 5) & 0xfc00) | ((col1.u << 8) & 0xf80000); + // c.u |= (c.u >> 5) & 0x070007; + // c.u |= (c.u >> 6) & 0x000300; + // color_array[1].u = c.u; + + if( col0.u > col1.u ) { + int bias = 0; + if (d3d9) bias = 1; + + // Four-color block: derive the other two colors. + color_array[2].r = (2 * color_array[0].r + color_array[1].r + bias) / 3; + color_array[2].g = (2 * color_array[0].g + color_array[1].g + bias) / 3; + color_array[2].b = (2 * color_array[0].b + color_array[1].b + bias) / 3; + color_array[2].a = 0xFF; + + color_array[3].r = (2 * color_array[1].r + color_array[0].r + bias) / 3; + color_array[3].g = (2 * color_array[1].g + color_array[0].g + bias) / 3; + color_array[3].b = (2 * color_array[1].b + color_array[0].b + bias) / 3; + color_array[3].a = 0xFF; + + return 4; + } + else { + // Three-color block: derive the other color. + color_array[2].r = (color_array[0].r + color_array[1].r) / 2; + color_array[2].g = (color_array[0].g + color_array[1].g) / 2; + color_array[2].b = (color_array[0].b + color_array[1].b) / 2; + color_array[2].a = 0xFF; + + // Set all components to 0 to match DXT specs. + color_array[3].r = 0x00; // color_array[2].r; + color_array[3].g = 0x00; // color_array[2].g; + color_array[3].b = 0x00; // color_array[2].b; + color_array[3].a = 0x00; + + return 3; + } +} + + +uint BlockDXT1::evaluatePaletteNV5x(Color32 color_array[4]) const +{ + // Does bit expansion before interpolation. + color_array[0].b = (3 * col0.b * 22) / 8; + color_array[0].g = (col0.g << 2) | (col0.g >> 4); + color_array[0].r = (3 * col0.r * 22) / 8; + color_array[0].a = 0xFF; + + color_array[1].r = (3 * col1.r * 22) / 8; + color_array[1].g = (col1.g << 2) | (col1.g >> 4); + color_array[1].b = (3 * col1.b * 22) / 8; + color_array[1].a = 0xFF; + + int gdiff = color_array[1].g - color_array[0].g; + + if( col0.u > col1.u ) { + // Four-color block: derive the other two colors. + color_array[2].r = ((2 * col0.r + col1.r) * 22) / 8; + color_array[2].g = (256 * color_array[0].g + gdiff / 4 + 128 + gdiff * 80) / 256; + color_array[2].b = ((2 * col0.b + col1.b) * 22) / 8; + color_array[2].a = 0xFF; + + color_array[3].r = ((2 * col1.r + col0.r) * 22) / 8; + color_array[3].g = (256 * color_array[1].g - gdiff / 4 + 128 - gdiff * 80) / 256; + color_array[3].b = ((2 * col1.b + col0.b) * 22) / 8; + color_array[3].a = 0xFF; + + return 4; + } + else { + // Three-color block: derive the other color. + color_array[2].r = ((col0.r + col1.r) * 33) / 8; + color_array[2].g = (256 * color_array[0].g + gdiff / 4 + 128 + gdiff * 128) / 256; + color_array[2].b = ((col0.b + col1.b) * 33) / 8; + color_array[2].a = 0xFF; + + // Set all components to 0 to match DXT specs. + color_array[3].r = 0x00; // color_array[2].r; + color_array[3].g = 0x00; // color_array[2].g; + color_array[3].b = 0x00; // color_array[2].b; + color_array[3].a = 0x00; + + return 3; + } +} + +// Evaluate palette assuming 3 color block. +void BlockDXT1::evaluatePalette3(Color32 color_array[4], bool d3d9) const +{ + color_array[0].b = (col0.b << 3) | (col0.b >> 2); + color_array[0].g = (col0.g << 2) | (col0.g >> 4); + color_array[0].r = (col0.r << 3) | (col0.r >> 2); + color_array[0].a = 0xFF; + + color_array[1].r = (col1.r << 3) | (col1.r >> 2); + color_array[1].g = (col1.g << 2) | (col1.g >> 4); + color_array[1].b = (col1.b << 3) | (col1.b >> 2); + color_array[1].a = 0xFF; + + // Three-color block: derive the other color. + color_array[2].r = (color_array[0].r + color_array[1].r) / 2; + color_array[2].g = (color_array[0].g + color_array[1].g) / 2; + color_array[2].b = (color_array[0].b + color_array[1].b) / 2; + color_array[2].a = 0xFF; + + // Set all components to 0 to match DXT specs. + color_array[3].r = 0x00; // color_array[2].r; + color_array[3].g = 0x00; // color_array[2].g; + color_array[3].b = 0x00; // color_array[2].b; + color_array[3].a = 0x00; +} + +// Evaluate palette assuming 4 color block. +void BlockDXT1::evaluatePalette4(Color32 color_array[4], bool d3d9) const +{ + color_array[0].b = (col0.b << 3) | (col0.b >> 2); + color_array[0].g = (col0.g << 2) | (col0.g >> 4); + color_array[0].r = (col0.r << 3) | (col0.r >> 2); + color_array[0].a = 0xFF; + + color_array[1].r = (col1.r << 3) | (col1.r >> 2); + color_array[1].g = (col1.g << 2) | (col1.g >> 4); + color_array[1].b = (col1.b << 3) | (col1.b >> 2); + color_array[1].a = 0xFF; + + int bias = 0; + if (d3d9) bias = 1; + + // Four-color block: derive the other two colors. + color_array[2].r = (2 * color_array[0].r + color_array[1].r + bias) / 3; + color_array[2].g = (2 * color_array[0].g + color_array[1].g + bias) / 3; + color_array[2].b = (2 * color_array[0].b + color_array[1].b + bias) / 3; + color_array[2].a = 0xFF; + + color_array[3].r = (2 * color_array[1].r + color_array[0].r + bias) / 3; + color_array[3].g = (2 * color_array[1].g + color_array[0].g + bias) / 3; + color_array[3].b = (2 * color_array[1].b + color_array[0].b + bias) / 3; + color_array[3].a = 0xFF; +} + + +void BlockDXT1::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const +{ + nvDebugCheck(block != NULL); + + // Decode color block. + Color32 color_array[4]; + evaluatePalette(color_array, d3d9); + + // Write color block. + for( uint j = 0; j < 4; j++ ) { + for( uint i = 0; i < 4; i++ ) { + uint idx = (row[j] >> (2 * i)) & 3; + block->color(i, j) = color_array[idx]; + } + } +} + +void BlockDXT1::decodeBlockNV5x(ColorBlock * block) const +{ + nvDebugCheck(block != NULL); + + // Decode color block. + Color32 color_array[4]; + evaluatePaletteNV5x(color_array); + + // Write color block. + for( uint j = 0; j < 4; j++ ) { + for( uint i = 0; i < 4; i++ ) { + uint idx = (row[j] >> (2 * i)) & 3; + block->color(i, j) = color_array[idx]; + } + } +} + +void BlockDXT1::setIndices(int * idx) +{ + indices = 0; + for(uint i = 0; i < 16; i++) { + indices |= (idx[i] & 3) << (2 * i); + } +} + + +/// Flip DXT1 block vertically. +inline void BlockDXT1::flip4() +{ + swap(row[0], row[3]); + swap(row[1], row[2]); +} + +/// Flip half DXT1 block vertically. +inline void BlockDXT1::flip2() +{ + swap(row[0], row[1]); +} + + +/*---------------------------------------------------------------------------- +BlockDXT3 +----------------------------------------------------------------------------*/ + +void BlockDXT3::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const +{ + nvDebugCheck(block != NULL); + + // Decode color. + color.decodeBlock(block, d3d9); + + // Decode alpha. + alpha.decodeBlock(block, d3d9); +} + +void BlockDXT3::decodeBlockNV5x(ColorBlock * block) const +{ + nvDebugCheck(block != NULL); + + color.decodeBlockNV5x(block); + alpha.decodeBlock(block); +} + +void AlphaBlockDXT3::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const +{ + nvDebugCheck(block != NULL); + + block->color(0x0).a = (alpha0 << 4) | alpha0; + block->color(0x1).a = (alpha1 << 4) | alpha1; + block->color(0x2).a = (alpha2 << 4) | alpha2; + block->color(0x3).a = (alpha3 << 4) | alpha3; + block->color(0x4).a = (alpha4 << 4) | alpha4; + block->color(0x5).a = (alpha5 << 4) | alpha5; + block->color(0x6).a = (alpha6 << 4) | alpha6; + block->color(0x7).a = (alpha7 << 4) | alpha7; + block->color(0x8).a = (alpha8 << 4) | alpha8; + block->color(0x9).a = (alpha9 << 4) | alpha9; + block->color(0xA).a = (alphaA << 4) | alphaA; + block->color(0xB).a = (alphaB << 4) | alphaB; + block->color(0xC).a = (alphaC << 4) | alphaC; + block->color(0xD).a = (alphaD << 4) | alphaD; + block->color(0xE).a = (alphaE << 4) | alphaE; + block->color(0xF).a = (alphaF << 4) | alphaF; +} + +/// Flip DXT3 alpha block vertically. +void AlphaBlockDXT3::flip4() +{ + swap(row[0], row[3]); + swap(row[1], row[2]); +} + +/// Flip half DXT3 alpha block vertically. +void AlphaBlockDXT3::flip2() +{ + swap(row[0], row[1]); +} + +/// Flip DXT3 block vertically. +void BlockDXT3::flip4() +{ + alpha.flip4(); + color.flip4(); +} + +/// Flip half DXT3 block vertically. +void BlockDXT3::flip2() +{ + alpha.flip2(); + color.flip2(); +} + + +/*---------------------------------------------------------------------------- +BlockDXT5 +----------------------------------------------------------------------------*/ + +void AlphaBlockDXT5::evaluatePalette(uint8 alpha[8], bool d3d9) const +{ + if (alpha0 > alpha1) { + evaluatePalette8(alpha, d3d9); + } + else { + evaluatePalette6(alpha, d3d9); + } +} + +void AlphaBlockDXT5::evaluatePalette8(uint8 alpha[8], bool d3d9) const +{ + int bias = 0; + if (d3d9) bias = 3; + + // 8-alpha block: derive the other six alphas. + // Bit code 000 = alpha0, 001 = alpha1, others are interpolated. + alpha[0] = alpha0; + alpha[1] = alpha1; + alpha[2] = (6 * alpha[0] + 1 * alpha[1] + bias) / 7; // bit code 010 + alpha[3] = (5 * alpha[0] + 2 * alpha[1] + bias) / 7; // bit code 011 + alpha[4] = (4 * alpha[0] + 3 * alpha[1] + bias) / 7; // bit code 100 + alpha[5] = (3 * alpha[0] + 4 * alpha[1] + bias) / 7; // bit code 101 + alpha[6] = (2 * alpha[0] + 5 * alpha[1] + bias) / 7; // bit code 110 + alpha[7] = (1 * alpha[0] + 6 * alpha[1] + bias) / 7; // bit code 111 +} + +void AlphaBlockDXT5::evaluatePalette6(uint8 alpha[8], bool d3d9) const +{ + int bias = 0; + if (d3d9) bias = 2; + + // 6-alpha block. + // Bit code 000 = alpha0, 001 = alpha1, others are interpolated. + alpha[0] = alpha0; + alpha[1] = alpha1; + alpha[2] = (4 * alpha[0] + 1 * alpha[1] + bias) / 5; // Bit code 010 + alpha[3] = (3 * alpha[0] + 2 * alpha[1] + bias) / 5; // Bit code 011 + alpha[4] = (2 * alpha[0] + 3 * alpha[1] + bias) / 5; // Bit code 100 + alpha[5] = (1 * alpha[0] + 4 * alpha[1] + bias) / 5; // Bit code 101 + alpha[6] = 0x00; // Bit code 110 + alpha[7] = 0xFF; // Bit code 111 +} + +void AlphaBlockDXT5::indices(uint8 index_array[16]) const +{ + index_array[0x0] = bits0; + index_array[0x1] = bits1; + index_array[0x2] = bits2; + index_array[0x3] = bits3; + index_array[0x4] = bits4; + index_array[0x5] = bits5; + index_array[0x6] = bits6; + index_array[0x7] = bits7; + index_array[0x8] = bits8; + index_array[0x9] = bits9; + index_array[0xA] = bitsA; + index_array[0xB] = bitsB; + index_array[0xC] = bitsC; + index_array[0xD] = bitsD; + index_array[0xE] = bitsE; + index_array[0xF] = bitsF; +} + +uint AlphaBlockDXT5::index(uint index) const +{ + nvDebugCheck(index < 16); + + int offset = (3 * index + 16); + return uint((this->u >> offset) & 0x7); +} + +void AlphaBlockDXT5::setIndex(uint index, uint value) +{ + nvDebugCheck(index < 16); + nvDebugCheck(value < 8); + + int offset = (3 * index + 16); + uint64 mask = uint64(0x7) << offset; + this->u = (this->u & ~mask) | (uint64(value) << offset); +} + +void AlphaBlockDXT5::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const +{ + nvDebugCheck(block != NULL); + + uint8 alpha_array[8]; + evaluatePalette(alpha_array, d3d9); + + uint8 index_array[16]; + indices(index_array); + + for(uint i = 0; i < 16; i++) { + block->color(i).a = alpha_array[index_array[i]]; + } +} + +void AlphaBlockDXT5::flip4() +{ + uint64 * b = (uint64 *)this; + + // @@ The masks might have to be byte swapped. + uint64 tmp = (*b & POSH_U64(0x000000000000FFFF)); + tmp |= (*b & POSH_U64(0x000000000FFF0000)) << 36; + tmp |= (*b & POSH_U64(0x000000FFF0000000)) << 12; + tmp |= (*b & POSH_U64(0x000FFF0000000000)) >> 12; + tmp |= (*b & POSH_U64(0xFFF0000000000000)) >> 36; + + *b = tmp; +} + +void AlphaBlockDXT5::flip2() +{ + uint * b = (uint *)this; + + // @@ The masks might have to be byte swapped. + uint tmp = (*b & 0xFF000000); + tmp |= (*b & 0x00000FFF) << 12; + tmp |= (*b & 0x00FFF000) >> 12; + + *b = tmp; +} + +void BlockDXT5::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const +{ + nvDebugCheck(block != NULL); + + // Decode color. + color.decodeBlock(block, d3d9); + + // Decode alpha. + alpha.decodeBlock(block, d3d9); +} + +void BlockDXT5::decodeBlockNV5x(ColorBlock * block) const +{ + nvDebugCheck(block != NULL); + + // Decode color. + color.decodeBlockNV5x(block); + + // Decode alpha. + alpha.decodeBlock(block); +} + +/// Flip DXT5 block vertically. +void BlockDXT5::flip4() +{ + alpha.flip4(); + color.flip4(); +} + +/// Flip half DXT5 block vertically. +void BlockDXT5::flip2() +{ + alpha.flip2(); + color.flip2(); +} + + +/// Decode ATI1 block. +void BlockATI1::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const +{ + uint8 alpha_array[8]; + alpha.evaluatePalette(alpha_array, d3d9); + + uint8 index_array[16]; + alpha.indices(index_array); + + for(uint i = 0; i < 16; i++) { + Color32 & c = block->color(i); + c.b = c.g = c.r = alpha_array[index_array[i]]; + c.a = 255; + } +} + +/// Flip ATI1 block vertically. +void BlockATI1::flip4() +{ + alpha.flip4(); +} + +/// Flip half ATI1 block vertically. +void BlockATI1::flip2() +{ + alpha.flip2(); +} + + +/// Decode ATI2 block. +void BlockATI2::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const +{ + uint8 alpha_array[8]; + uint8 index_array[16]; + + x.evaluatePalette(alpha_array, d3d9); + x.indices(index_array); + + for(uint i = 0; i < 16; i++) { + Color32 & c = block->color(i); + c.r = alpha_array[index_array[i]]; + } + + y.evaluatePalette(alpha_array, d3d9); + y.indices(index_array); + + for(uint i = 0; i < 16; i++) { + Color32 & c = block->color(i); + c.g = alpha_array[index_array[i]]; + c.b = 0; + c.a = 255; + } +} + +/// Flip ATI2 block vertically. +void BlockATI2::flip4() +{ + x.flip4(); + y.flip4(); +} + +/// Flip half ATI2 block vertically. +void BlockATI2::flip2() +{ + x.flip2(); + y.flip2(); +} + + +void BlockCTX1::evaluatePalette(Color32 color_array[4]) const +{ + // Does bit expansion before interpolation. + color_array[0].b = 0x00; + color_array[0].g = col0[1]; + color_array[0].r = col0[0]; + color_array[0].a = 0xFF; + + color_array[1].r = 0x00; + color_array[1].g = col0[1]; + color_array[1].b = col1[0]; + color_array[1].a = 0xFF; + + color_array[2].r = 0x00; + color_array[2].g = (2 * color_array[0].g + color_array[1].g) / 3; + color_array[2].b = (2 * color_array[0].b + color_array[1].b) / 3; + color_array[2].a = 0xFF; + + color_array[3].r = 0x00; + color_array[3].g = (2 * color_array[1].g + color_array[0].g) / 3; + color_array[3].b = (2 * color_array[1].b + color_array[0].b) / 3; + color_array[3].a = 0xFF; +} + +void BlockCTX1::decodeBlock(ColorBlock * block) const +{ + nvDebugCheck(block != NULL); + + // Decode color block. + Color32 color_array[4]; + evaluatePalette(color_array); + + // Write color block. + for( uint j = 0; j < 4; j++ ) { + for( uint i = 0; i < 4; i++ ) { + uint idx = (row[j] >> (2 * i)) & 3; + block->color(i, j) = color_array[idx]; + } + } +} + +void BlockCTX1::setIndices(int * idx) +{ + indices = 0; + for(uint i = 0; i < 16; i++) { + indices |= (idx[i] & 3) << (2 * i); + } +} + + +/// Flip CTX1 block vertically. +inline void BlockCTX1::flip4() +{ + swap(row[0], row[3]); + swap(row[1], row[2]); +} + +/// Flip half CTX1 block vertically. +inline void BlockCTX1::flip2() +{ + swap(row[0], row[1]); +} + + + + +Stream & nv::operator<<(Stream & stream, BlockDXT1 & block) +{ + stream << block.col0.u << block.col1.u; + stream.serialize(&block.indices, sizeof(block.indices)); + return stream; +} + +Stream & nv::operator<<(Stream & stream, AlphaBlockDXT3 & block) +{ + stream.serialize(&block, sizeof(block)); + return stream; +} + +Stream & nv::operator<<(Stream & stream, BlockDXT3 & block) +{ + return stream << block.alpha << block.color; +} + +Stream & nv::operator<<(Stream & stream, AlphaBlockDXT5 & block) +{ + stream.serialize(&block, sizeof(block)); + return stream; +} + +Stream & nv::operator<<(Stream & stream, BlockDXT5 & block) +{ + return stream << block.alpha << block.color; +} + +Stream & nv::operator<<(Stream & stream, BlockATI1 & block) +{ + return stream << block.alpha; +} + +Stream & nv::operator<<(Stream & stream, BlockATI2 & block) +{ + return stream << block.x << block.y; +} + +Stream & nv::operator<<(Stream & stream, BlockCTX1 & block) +{ + stream.serialize(&block, sizeof(block)); + return stream; +} + diff --git a/src/nvimage/BlockDXT.h b/src/nvimage/BlockDXT.h index b2b3de8..df0541b 100644 --- a/src/nvimage/BlockDXT.h +++ b/src/nvimage/BlockDXT.h @@ -1,228 +1,228 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#pragma once -#ifndef NV_IMAGE_BLOCKDXT_H -#define NV_IMAGE_BLOCKDXT_H - -#include "nvimage.h" - -#include "nvmath/Color.h" - -namespace nv -{ - struct ColorBlock; - class Stream; - - - /// DXT1 block. - struct BlockDXT1 - { - Color16 col0; - Color16 col1; - union { - uint8 row[4]; - uint indices; - }; - - bool isFourColorMode() const; - - uint evaluatePalette(Color32 color_array[4], bool d3d9) const; - uint evaluatePaletteNV5x(Color32 color_array[4]) const; - - void evaluatePalette3(Color32 color_array[4], bool d3d9) const; - void evaluatePalette4(Color32 color_array[4], bool d3d9) const; - - void decodeBlock(ColorBlock * block, bool d3d9 = false) const; - void decodeBlockNV5x(ColorBlock * block) const; - - void setIndices(int * idx); - - void flip4(); - void flip2(); - }; - - /// Return true if the block uses four color mode, false otherwise. - inline bool BlockDXT1::isFourColorMode() const - { - return col0.u > col1.u; - } - - - /// DXT3 alpha block with explicit alpha. - struct AlphaBlockDXT3 - { - union { - struct { - uint alpha0 : 4; - uint alpha1 : 4; - uint alpha2 : 4; - uint alpha3 : 4; - uint alpha4 : 4; - uint alpha5 : 4; - uint alpha6 : 4; - uint alpha7 : 4; - uint alpha8 : 4; - uint alpha9 : 4; - uint alphaA : 4; - uint alphaB : 4; - uint alphaC : 4; - uint alphaD : 4; - uint alphaE : 4; - uint alphaF : 4; - }; - uint16 row[4]; - }; - - void decodeBlock(ColorBlock * block, bool d3d9 = false) const; - - void flip4(); - void flip2(); - }; - - - /// DXT3 block. - struct BlockDXT3 - { - AlphaBlockDXT3 alpha; - BlockDXT1 color; - - void decodeBlock(ColorBlock * block, bool d3d9 = false) const; - void decodeBlockNV5x(ColorBlock * block) const; - - void flip4(); - void flip2(); - }; - - - /// DXT5 alpha block. - struct AlphaBlockDXT5 - { - union { - struct { - uint64 alpha0 : 8; // 8 - uint64 alpha1 : 8; // 16 - uint64 bits0 : 3; // 3 - 19 - uint64 bits1 : 3; // 6 - 22 - uint64 bits2 : 3; // 9 - 25 - uint64 bits3 : 3; // 12 - 28 - uint64 bits4 : 3; // 15 - 31 - uint64 bits5 : 3; // 18 - 34 - uint64 bits6 : 3; // 21 - 37 - uint64 bits7 : 3; // 24 - 40 - uint64 bits8 : 3; // 27 - 43 - uint64 bits9 : 3; // 30 - 46 - uint64 bitsA : 3; // 33 - 49 - uint64 bitsB : 3; // 36 - 52 - uint64 bitsC : 3; // 39 - 55 - uint64 bitsD : 3; // 42 - 58 - uint64 bitsE : 3; // 45 - 61 - uint64 bitsF : 3; // 48 - 64 - }; - uint64 u; - }; - - void evaluatePalette(uint8 alpha[8], bool d3d9) const; - void evaluatePalette8(uint8 alpha[8], bool d3d9) const; - void evaluatePalette6(uint8 alpha[8], bool d3d9) const; - void indices(uint8 index_array[16]) const; - - uint index(uint index) const; - void setIndex(uint index, uint value); - - void decodeBlock(ColorBlock * block, bool d3d9 = false) const; - - void flip4(); - void flip2(); - }; - - - /// DXT5 block. - struct BlockDXT5 - { - AlphaBlockDXT5 alpha; - BlockDXT1 color; - - void decodeBlock(ColorBlock * block, bool d3d9 = false) const; - void decodeBlockNV5x(ColorBlock * block) const; - - void flip4(); - void flip2(); - }; - - /// ATI1 block. - struct BlockATI1 - { - AlphaBlockDXT5 alpha; - - void decodeBlock(ColorBlock * block, bool d3d9 = false) const; - - void flip4(); - void flip2(); - }; - - /// ATI2 block. - struct BlockATI2 - { - AlphaBlockDXT5 x; - AlphaBlockDXT5 y; - - void decodeBlock(ColorBlock * block, bool d3d9 = false) const; - - void flip4(); - void flip2(); - }; - - /// CTX1 block. - struct BlockCTX1 - { - uint8 col0[2]; - uint8 col1[2]; - union { - uint8 row[4]; - uint indices; - }; - - void evaluatePalette(Color32 color_array[4]) const; - void setIndices(int * idx); - - void decodeBlock(ColorBlock * block) const; - - void flip4(); - void flip2(); - }; - - - // Serialization functions. - NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT1 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, AlphaBlockDXT3 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT3 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, AlphaBlockDXT5 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT5 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI1 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI2 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, BlockCTX1 & block); - -} // nv namespace - -#endif // NV_IMAGE_BLOCKDXT_H +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#pragma once +#ifndef NV_IMAGE_BLOCKDXT_H +#define NV_IMAGE_BLOCKDXT_H + +#include "nvimage.h" + +#include "nvmath/Color.h" + +namespace nv +{ + struct ColorBlock; + class Stream; + + + /// DXT1 block. + struct BlockDXT1 + { + Color16 col0; + Color16 col1; + union { + uint8 row[4]; + uint indices; + }; + + bool isFourColorMode() const; + + uint evaluatePalette(Color32 color_array[4], bool d3d9) const; + uint evaluatePaletteNV5x(Color32 color_array[4]) const; + + void evaluatePalette3(Color32 color_array[4], bool d3d9) const; + void evaluatePalette4(Color32 color_array[4], bool d3d9) const; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + void decodeBlockNV5x(ColorBlock * block) const; + + void setIndices(int * idx); + + void flip4(); + void flip2(); + }; + + /// Return true if the block uses four color mode, false otherwise. + inline bool BlockDXT1::isFourColorMode() const + { + return col0.u > col1.u; + } + + + /// DXT3 alpha block with explicit alpha. + struct AlphaBlockDXT3 + { + union { + struct { + uint alpha0 : 4; + uint alpha1 : 4; + uint alpha2 : 4; + uint alpha3 : 4; + uint alpha4 : 4; + uint alpha5 : 4; + uint alpha6 : 4; + uint alpha7 : 4; + uint alpha8 : 4; + uint alpha9 : 4; + uint alphaA : 4; + uint alphaB : 4; + uint alphaC : 4; + uint alphaD : 4; + uint alphaE : 4; + uint alphaF : 4; + }; + uint16 row[4]; + }; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + + void flip4(); + void flip2(); + }; + + + /// DXT3 block. + struct BlockDXT3 + { + AlphaBlockDXT3 alpha; + BlockDXT1 color; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + void decodeBlockNV5x(ColorBlock * block) const; + + void flip4(); + void flip2(); + }; + + + /// DXT5 alpha block. + struct AlphaBlockDXT5 + { + union { + struct { + uint64 alpha0 : 8; // 8 + uint64 alpha1 : 8; // 16 + uint64 bits0 : 3; // 3 - 19 + uint64 bits1 : 3; // 6 - 22 + uint64 bits2 : 3; // 9 - 25 + uint64 bits3 : 3; // 12 - 28 + uint64 bits4 : 3; // 15 - 31 + uint64 bits5 : 3; // 18 - 34 + uint64 bits6 : 3; // 21 - 37 + uint64 bits7 : 3; // 24 - 40 + uint64 bits8 : 3; // 27 - 43 + uint64 bits9 : 3; // 30 - 46 + uint64 bitsA : 3; // 33 - 49 + uint64 bitsB : 3; // 36 - 52 + uint64 bitsC : 3; // 39 - 55 + uint64 bitsD : 3; // 42 - 58 + uint64 bitsE : 3; // 45 - 61 + uint64 bitsF : 3; // 48 - 64 + }; + uint64 u; + }; + + void evaluatePalette(uint8 alpha[8], bool d3d9) const; + void evaluatePalette8(uint8 alpha[8], bool d3d9) const; + void evaluatePalette6(uint8 alpha[8], bool d3d9) const; + void indices(uint8 index_array[16]) const; + + uint index(uint index) const; + void setIndex(uint index, uint value); + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + + void flip4(); + void flip2(); + }; + + + /// DXT5 block. + struct BlockDXT5 + { + AlphaBlockDXT5 alpha; + BlockDXT1 color; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + void decodeBlockNV5x(ColorBlock * block) const; + + void flip4(); + void flip2(); + }; + + /// ATI1 block. + struct BlockATI1 + { + AlphaBlockDXT5 alpha; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + + void flip4(); + void flip2(); + }; + + /// ATI2 block. + struct BlockATI2 + { + AlphaBlockDXT5 x; + AlphaBlockDXT5 y; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + + void flip4(); + void flip2(); + }; + + /// CTX1 block. + struct BlockCTX1 + { + uint8 col0[2]; + uint8 col1[2]; + union { + uint8 row[4]; + uint indices; + }; + + void evaluatePalette(Color32 color_array[4]) const; + void setIndices(int * idx); + + void decodeBlock(ColorBlock * block) const; + + void flip4(); + void flip2(); + }; + + + // Serialization functions. + NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT1 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, AlphaBlockDXT3 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT3 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, AlphaBlockDXT5 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT5 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI1 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI2 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockCTX1 & block); + +} // nv namespace + +#endif // NV_IMAGE_BLOCKDXT_H diff --git a/src/nvimage/ColorBlock.cpp b/src/nvimage/ColorBlock.cpp index 2da7752..2087e85 100644 --- a/src/nvimage/ColorBlock.cpp +++ b/src/nvimage/ColorBlock.cpp @@ -1,635 +1,635 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include "ColorBlock.h" -#include "Image.h" -#include "FloatImage.h" - -#include "nvmath/Box.h" -#include "nvmath/Vector.inl" -#include "nvcore/Utils.h" // swap - -#include // memcpy - -using namespace nv; - -namespace { - - // Get approximate luminance. - inline static uint colorLuminance(Color32 c) - { - return c.r + c.g + c.b; - } - - // Get the euclidean distance between the given colors. - inline static uint colorDistance(Color32 c0, Color32 c1) - { - return (c0.r - c1.r) * (c0.r - c1.r) + (c0.g - c1.g) * (c0.g - c1.g) + (c0.b - c1.b) * (c0.b - c1.b); - } - -} // namespace` - - -/// Default constructor. -ColorBlock::ColorBlock() -{ -} - -/// Init the color block from an array of colors. -ColorBlock::ColorBlock(const uint * linearImage) -{ - for(uint i = 0; i < 16; i++) { - color(i) = Color32(linearImage[i]); - } -} - -/// Init the color block with the contents of the given block. -ColorBlock::ColorBlock(const ColorBlock & block) -{ - for(uint i = 0; i < 16; i++) { - color(i) = block.color(i); - } -} - - -/// Initialize this color block. -ColorBlock::ColorBlock(const Image * img, uint x, uint y) -{ - init(img, x, y); -} - -void ColorBlock::init(const Image * img, uint x, uint y) -{ - init(img->width(), img->height(), (const uint *)img->pixels(), x, y); -} - -void ColorBlock::init(uint w, uint h, const uint * data, uint x, uint y) -{ - nvDebugCheck(data != NULL); - - const uint bw = min(w - x, 4U); - const uint bh = min(h - y, 4U); - nvDebugCheck(bw != 0 && bh != 0); - - // Blocks that are smaller than 4x4 are handled by repeating the pixels. - // @@ Thats only correct when block size is 1, 2 or 4, but not with 3. :( - // @@ Ideally we should zero the weights of the pixels out of range. - - for (uint i = 0; i < 4; i++) - { - const int by = i % bh; - - for (uint e = 0; e < 4; e++) - { - const int bx = e % bw; - const uint idx = (y + by) * w + x + bx; - - color(e, i).u = data[idx]; - } - } -} - -void ColorBlock::init(uint w, uint h, const float * data, uint x, uint y) -{ - nvDebugCheck(data != NULL); - - const uint bw = min(w - x, 4U); - const uint bh = min(h - y, 4U); - nvDebugCheck(bw != 0 && bh != 0); - - // Blocks that are smaller than 4x4 are handled by repeating the pixels. - // @@ Thats only correct when block size is 1, 2 or 4, but not with 3. :( - // @@ Ideally we should zero the weights of the pixels out of range. - - uint srcPlane = w * h; - - for (uint i = 0; i < 4; i++) - { - const uint by = i % bh; - - for (uint e = 0; e < 4; e++) - { - const uint bx = e % bw; - const uint idx = ((y + by) * w + x + bx); - - Color32 & c = color(e, i); - c.r = uint8(255 * clamp(data[idx + 0 * srcPlane], 0.0f, 1.0f)); // @@ Is this the right way to quantize floats to bytes? - c.g = uint8(255 * clamp(data[idx + 1 * srcPlane], 0.0f, 1.0f)); - c.b = uint8(255 * clamp(data[idx + 2 * srcPlane], 0.0f, 1.0f)); - c.a = uint8(255 * clamp(data[idx + 3 * srcPlane], 0.0f, 1.0f)); - } - } -} - -static inline uint8 component(Color32 c, uint i) -{ - if (i == 0) return c.r; - if (i == 1) return c.g; - if (i == 2) return c.b; - if (i == 3) return c.a; - if (i == 4) return 0xFF; - return 0; -} - -void ColorBlock::swizzle(uint x, uint y, uint z, uint w) -{ - for (int i = 0; i < 16; i++) - { - Color32 c = m_color[i]; - m_color[i].r = component(c, x); - m_color[i].g = component(c, y); - m_color[i].b = component(c, z); - m_color[i].a = component(c, w); - } -} - - -/// Returns true if the block has a single color. -bool ColorBlock::isSingleColor(Color32 mask/*= Color32(0xFF, 0xFF, 0xFF, 0x00)*/) const -{ - uint u = m_color[0].u & mask.u; - - for (int i = 1; i < 16; i++) - { - if (u != (m_color[i].u & mask.u)) - { - return false; - } - } - - return true; -} - -/* -/// Returns true if the block has a single color, ignoring transparent pixels. -bool ColorBlock::isSingleColorNoAlpha() const -{ - Color32 c; - int i; - for(i = 0; i < 16; i++) - { - if (m_color[i].a != 0) c = m_color[i]; - } - - Color32 mask(0xFF, 0xFF, 0xFF, 0x00); - uint u = c.u & mask.u; - - for(; i < 16; i++) - { - if (u != (m_color[i].u & mask.u)) - { - return false; - } - } - - return true; -} -*/ - -/// Count number of unique colors in this color block. -/*uint ColorBlock::countUniqueColors() const -{ - uint count = 0; - - // @@ This does not have to be o(n^2) - for(int i = 0; i < 16; i++) - { - bool unique = true; - for(int j = 0; j < i; j++) { - if( m_color[i] != m_color[j] ) { - unique = false; - } - } - - if( unique ) { - count++; - } - } - - return count; -}*/ - -/*/// Get average color of the block. -Color32 ColorBlock::averageColor() const -{ - uint r, g, b, a; - r = g = b = a = 0; - - for(uint i = 0; i < 16; i++) { - r += m_color[i].r; - g += m_color[i].g; - b += m_color[i].b; - a += m_color[i].a; - } - - return Color32(uint8(r / 16), uint8(g / 16), uint8(b / 16), uint8(a / 16)); -}*/ - -/// Return true if the block is not fully opaque. -bool ColorBlock::hasAlpha() const -{ - for (uint i = 0; i < 16; i++) - { - if (m_color[i].a != 255) return true; - } - return false; -} - -#if 0 - -/// Get diameter color range. -void ColorBlock::diameterRange(Color32 * start, Color32 * end) const -{ - nvDebugCheck(start != NULL); - nvDebugCheck(end != NULL); - - Color32 c0, c1; - uint best_dist = 0; - - for(int i = 0; i < 16; i++) { - for (int j = i+1; j < 16; j++) { - uint dist = colorDistance(m_color[i], m_color[j]); - if( dist > best_dist ) { - best_dist = dist; - c0 = m_color[i]; - c1 = m_color[j]; - } - } - } - - *start = c0; - *end = c1; -} - -/// Get luminance color range. -void ColorBlock::luminanceRange(Color32 * start, Color32 * end) const -{ - nvDebugCheck(start != NULL); - nvDebugCheck(end != NULL); - - Color32 minColor, maxColor; - uint minLuminance, maxLuminance; - - maxLuminance = minLuminance = colorLuminance(m_color[0]); - - for(uint i = 1; i < 16; i++) - { - uint luminance = colorLuminance(m_color[i]); - - if (luminance > maxLuminance) { - maxLuminance = luminance; - maxColor = m_color[i]; - } - else if (luminance < minLuminance) { - minLuminance = luminance; - minColor = m_color[i]; - } - } - - *start = minColor; - *end = maxColor; -} - -/// Get color range based on the bounding box. -void ColorBlock::boundsRange(Color32 * start, Color32 * end) const -{ - nvDebugCheck(start != NULL); - nvDebugCheck(end != NULL); - - Color32 minColor(255, 255, 255); - Color32 maxColor(0, 0, 0); - - for(uint i = 0; i < 16; i++) - { - if (m_color[i].r < minColor.r) { minColor.r = m_color[i].r; } - if (m_color[i].g < minColor.g) { minColor.g = m_color[i].g; } - if (m_color[i].b < minColor.b) { minColor.b = m_color[i].b; } - if (m_color[i].r > maxColor.r) { maxColor.r = m_color[i].r; } - if (m_color[i].g > maxColor.g) { maxColor.g = m_color[i].g; } - if (m_color[i].b > maxColor.b) { maxColor.b = m_color[i].b; } - } - - // Offset range by 1/16 of the extents - Color32 inset; - inset.r = (maxColor.r - minColor.r) >> 4; - inset.g = (maxColor.g - minColor.g) >> 4; - inset.b = (maxColor.b - minColor.b) >> 4; - - minColor.r = (minColor.r + inset.r <= 255) ? minColor.r + inset.r : 255; - minColor.g = (minColor.g + inset.g <= 255) ? minColor.g + inset.g : 255; - minColor.b = (minColor.b + inset.b <= 255) ? minColor.b + inset.b : 255; - - maxColor.r = (maxColor.r >= inset.r) ? maxColor.r - inset.r : 0; - maxColor.g = (maxColor.g >= inset.g) ? maxColor.g - inset.g : 0; - maxColor.b = (maxColor.b >= inset.b) ? maxColor.b - inset.b : 0; - - *start = minColor; - *end = maxColor; -} - -/// Get color range based on the bounding box. -void ColorBlock::boundsRangeAlpha(Color32 * start, Color32 * end) const -{ - nvDebugCheck(start != NULL); - nvDebugCheck(end != NULL); - - Color32 minColor(255, 255, 255, 255); - Color32 maxColor(0, 0, 0, 0); - - for(uint i = 0; i < 16; i++) - { - if (m_color[i].r < minColor.r) { minColor.r = m_color[i].r; } - if (m_color[i].g < minColor.g) { minColor.g = m_color[i].g; } - if (m_color[i].b < minColor.b) { minColor.b = m_color[i].b; } - if (m_color[i].a < minColor.a) { minColor.a = m_color[i].a; } - if (m_color[i].r > maxColor.r) { maxColor.r = m_color[i].r; } - if (m_color[i].g > maxColor.g) { maxColor.g = m_color[i].g; } - if (m_color[i].b > maxColor.b) { maxColor.b = m_color[i].b; } - if (m_color[i].a > maxColor.a) { maxColor.a = m_color[i].a; } - } - - // Offset range by 1/16 of the extents - Color32 inset; - inset.r = (maxColor.r - minColor.r) >> 4; - inset.g = (maxColor.g - minColor.g) >> 4; - inset.b = (maxColor.b - minColor.b) >> 4; - inset.a = (maxColor.a - minColor.a) >> 4; - - minColor.r = (minColor.r + inset.r <= 255) ? minColor.r + inset.r : 255; - minColor.g = (minColor.g + inset.g <= 255) ? minColor.g + inset.g : 255; - minColor.b = (minColor.b + inset.b <= 255) ? minColor.b + inset.b : 255; - minColor.a = (minColor.a + inset.a <= 255) ? minColor.a + inset.a : 255; - - maxColor.r = (maxColor.r >= inset.r) ? maxColor.r - inset.r : 0; - maxColor.g = (maxColor.g >= inset.g) ? maxColor.g - inset.g : 0; - maxColor.b = (maxColor.b >= inset.b) ? maxColor.b - inset.b : 0; - maxColor.a = (maxColor.a >= inset.a) ? maxColor.a - inset.a : 0; - - *start = minColor; - *end = maxColor; -} -#endif - -/*/// Sort colors by abosolute value in their 16 bit representation. -void ColorBlock::sortColorsByAbsoluteValue() -{ - // Dummy selection sort. - for( uint a = 0; a < 16; a++ ) { - uint max = a; - Color16 cmax(m_color[a]); - - for( uint b = a+1; b < 16; b++ ) { - Color16 cb(m_color[b]); - - if( cb.u > cmax.u ) { - max = b; - cmax = cb; - } - } - swap( m_color[a], m_color[max] ); - } -}*/ - - -/*/// Find extreme colors in the given axis. -void ColorBlock::computeRange(Vector3::Arg axis, Color32 * start, Color32 * end) const -{ - nvDebugCheck(start != NULL); - nvDebugCheck(end != NULL); - - int mini, maxi; - mini = maxi = 0; - - float min, max; - min = max = dot(Vector3(m_color[0].r, m_color[0].g, m_color[0].b), axis); - - for(uint i = 1; i < 16; i++) - { - const Vector3 vec(m_color[i].r, m_color[i].g, m_color[i].b); - - float val = dot(vec, axis); - if( val < min ) { - mini = i; - min = val; - } - else if( val > max ) { - maxi = i; - max = val; - } - } - - *start = m_color[mini]; - *end = m_color[maxi]; -}*/ - - -/*/// Sort colors in the given axis. -void ColorBlock::sortColors(const Vector3 & axis) -{ - float luma_array[16]; - - for(uint i = 0; i < 16; i++) { - const Vector3 vec(m_color[i].r, m_color[i].g, m_color[i].b); - luma_array[i] = dot(vec, axis); - } - - // Dummy selection sort. - for( uint a = 0; a < 16; a++ ) { - uint min = a; - for( uint b = a+1; b < 16; b++ ) { - if( luma_array[b] < luma_array[min] ) { - min = b; - } - } - swap( luma_array[a], luma_array[min] ); - swap( m_color[a], m_color[min] ); - } -}*/ - - -/*/// Get the volume of the color block. -float ColorBlock::volume() const -{ - Box bounds; - bounds.clearBounds(); - - for(int i = 0; i < 16; i++) { - const Vector3 point(m_color[i].r, m_color[i].g, m_color[i].b); - bounds.addPointToBounds(point); - } - - return bounds.volume(); -}*/ - - -void ColorSet::setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y) -{ - nvDebugCheck(img_x < img_w && img_y < img_h); - - w = min(4U, img_w - img_x); - h = min(4U, img_h - img_y); - nvDebugCheck(w != 0 && h != 0); - - count = w * h; - - const float * r = data + img_w * img_h * 0; - const float * g = data + img_w * img_h * 1; - const float * b = data + img_w * img_h * 2; - const float * a = data + img_w * img_h * 3; - - // Set colors. - for (uint y = 0, i = 0; y < h; y++) - { - for (uint x = 0; x < w; x++, i++) - { - uint idx = x + img_x + (y + img_y) * img_w; - colors[i].x = r[idx]; - colors[i].y = g[idx]; - colors[i].z = b[idx]; - colors[i].w = a[idx]; - } - } -} - -void ColorSet::setAlphaWeights() -{ - for (uint i = 0; i < count; i++) - { - weights[i] = max(colors[i].w, 0.001f); // Avoid division by zero. - } -} - -void ColorSet::setUniformWeights() -{ - for (uint i = 0; i < count; i++) - { - weights[i] = 1.0f; - } -} - - -void ColorSet::createMinimalSet(bool ignoreTransparent) -{ - nvDebugCheck(count == w*h); // Do not call this method multiple times. - - Vector4 C[16]; - float W[16]; - memcpy(C, colors, sizeof(Vector4)*count); - memcpy(W, weights, sizeof(float)*count); - - uint n = 0; - for (uint y = 0, i = 0; y < h; y++) - { - for (uint x = 0; x < w; x++, i++) - { - if (ignoreTransparent && C[i].w == 0) { - continue; - } - - uint idx = y * 4 + x; - - // loop over previous points for a match - for (int j = 0; ; j++) - { - // allocate a new point - if (j == i) - { - colors[n] = C[i]; - weights[n] = W[i]; - remap[idx] = n; - n++; - break; - } - - // check for a match - bool colorMatch = (C[i].x == C[j].x) && (C[i].w == C[j].w) && (C[i].z == C[j].z); - //bool alphaMatch = (C[i].w == C[j].w); - - if (colorMatch) - { - // get the index of the match - int index = remap[j]; - - // map to this point and increase the weight - weights[index] += W[i]; - remap[idx] = index; - break; - } - } - } - } - - count = n; - - // Avoid empty blocks. - if (count == 0) { - count = 1; - //colors[0] = C[0]; - //weights[0] = W[0]; - memset(remap, 0, sizeof(int)*16); - } -} - - -// Fill blocks that are smaller than (4,4) by wrapping indices. -void ColorSet::wrapIndices() -{ - for (uint y = h; y < 4; y++) - { - uint base = (y % h) * w; - for (uint x = w; x < 4; x++) - { - remap[y*4+3] = remap[base + (x % w)]; - } - } -} - -bool ColorSet::isSingleColor(bool ignoreAlpha) const -{ - Vector4 v = colors[0]; - if (ignoreAlpha) v.w = 1.0f; - - for (uint i = 1; i < count; i++) - { - Vector4 c = colors[i]; - if (ignoreAlpha) c.w = 1.0f; - - if (v != c) { - return false; - } - } - - return true; -} - - -// 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0 -static inline float component(Vector4::Arg c, uint i) -{ - if (i == 0) return c.x; - if (i == 1) return c.y; - if (i == 2) return c.z; - if (i == 3) return c.w; - if (i == 4) return 0xFF; - return 0; -} - -void ColorSet::swizzle(uint x, uint y, uint z, uint w) -{ - for (uint i = 0; i < count; i++) - { - Vector4 c = colors[i]; - colors[i].x = component(c, x); - colors[i].y = component(c, y); - colors[i].z = component(c, z); - colors[i].w = component(c, w); - } -} - -bool ColorSet::hasAlpha() const -{ - for (uint i = 0; i < count; i++) - { - if (colors[i].w != 0.0f) return true; - } - return false; -} +// This code is in the public domain -- castanyo@yahoo.es + +#include "ColorBlock.h" +#include "Image.h" +#include "FloatImage.h" + +#include "nvmath/Box.h" +#include "nvmath/Vector.inl" +#include "nvcore/Utils.h" // swap + +#include // memcpy + +using namespace nv; + +namespace { + + // Get approximate luminance. + inline static uint colorLuminance(Color32 c) + { + return c.r + c.g + c.b; + } + + // Get the euclidean distance between the given colors. + inline static uint colorDistance(Color32 c0, Color32 c1) + { + return (c0.r - c1.r) * (c0.r - c1.r) + (c0.g - c1.g) * (c0.g - c1.g) + (c0.b - c1.b) * (c0.b - c1.b); + } + +} // namespace` + + +/// Default constructor. +ColorBlock::ColorBlock() +{ +} + +/// Init the color block from an array of colors. +ColorBlock::ColorBlock(const uint * linearImage) +{ + for(uint i = 0; i < 16; i++) { + color(i) = Color32(linearImage[i]); + } +} + +/// Init the color block with the contents of the given block. +ColorBlock::ColorBlock(const ColorBlock & block) +{ + for(uint i = 0; i < 16; i++) { + color(i) = block.color(i); + } +} + + +/// Initialize this color block. +ColorBlock::ColorBlock(const Image * img, uint x, uint y) +{ + init(img, x, y); +} + +void ColorBlock::init(const Image * img, uint x, uint y) +{ + init(img->width(), img->height(), (const uint *)img->pixels(), x, y); +} + +void ColorBlock::init(uint w, uint h, const uint * data, uint x, uint y) +{ + nvDebugCheck(data != NULL); + + const uint bw = min(w - x, 4U); + const uint bh = min(h - y, 4U); + nvDebugCheck(bw != 0 && bh != 0); + + // Blocks that are smaller than 4x4 are handled by repeating the pixels. + // @@ Thats only correct when block size is 1, 2 or 4, but not with 3. :( + // @@ Ideally we should zero the weights of the pixels out of range. + + for (uint i = 0; i < 4; i++) + { + const int by = i % bh; + + for (uint e = 0; e < 4; e++) + { + const int bx = e % bw; + const uint idx = (y + by) * w + x + bx; + + color(e, i).u = data[idx]; + } + } +} + +void ColorBlock::init(uint w, uint h, const float * data, uint x, uint y) +{ + nvDebugCheck(data != NULL); + + const uint bw = min(w - x, 4U); + const uint bh = min(h - y, 4U); + nvDebugCheck(bw != 0 && bh != 0); + + // Blocks that are smaller than 4x4 are handled by repeating the pixels. + // @@ Thats only correct when block size is 1, 2 or 4, but not with 3. :( + // @@ Ideally we should zero the weights of the pixels out of range. + + uint srcPlane = w * h; + + for (uint i = 0; i < 4; i++) + { + const uint by = i % bh; + + for (uint e = 0; e < 4; e++) + { + const uint bx = e % bw; + const uint idx = ((y + by) * w + x + bx); + + Color32 & c = color(e, i); + c.r = uint8(255 * clamp(data[idx + 0 * srcPlane], 0.0f, 1.0f)); // @@ Is this the right way to quantize floats to bytes? + c.g = uint8(255 * clamp(data[idx + 1 * srcPlane], 0.0f, 1.0f)); + c.b = uint8(255 * clamp(data[idx + 2 * srcPlane], 0.0f, 1.0f)); + c.a = uint8(255 * clamp(data[idx + 3 * srcPlane], 0.0f, 1.0f)); + } + } +} + +static inline uint8 component(Color32 c, uint i) +{ + if (i == 0) return c.r; + if (i == 1) return c.g; + if (i == 2) return c.b; + if (i == 3) return c.a; + if (i == 4) return 0xFF; + return 0; +} + +void ColorBlock::swizzle(uint x, uint y, uint z, uint w) +{ + for (int i = 0; i < 16; i++) + { + Color32 c = m_color[i]; + m_color[i].r = component(c, x); + m_color[i].g = component(c, y); + m_color[i].b = component(c, z); + m_color[i].a = component(c, w); + } +} + + +/// Returns true if the block has a single color. +bool ColorBlock::isSingleColor(Color32 mask/*= Color32(0xFF, 0xFF, 0xFF, 0x00)*/) const +{ + uint u = m_color[0].u & mask.u; + + for (int i = 1; i < 16; i++) + { + if (u != (m_color[i].u & mask.u)) + { + return false; + } + } + + return true; +} + +/* +/// Returns true if the block has a single color, ignoring transparent pixels. +bool ColorBlock::isSingleColorNoAlpha() const +{ + Color32 c; + int i; + for(i = 0; i < 16; i++) + { + if (m_color[i].a != 0) c = m_color[i]; + } + + Color32 mask(0xFF, 0xFF, 0xFF, 0x00); + uint u = c.u & mask.u; + + for(; i < 16; i++) + { + if (u != (m_color[i].u & mask.u)) + { + return false; + } + } + + return true; +} +*/ + +/// Count number of unique colors in this color block. +/*uint ColorBlock::countUniqueColors() const +{ + uint count = 0; + + // @@ This does not have to be o(n^2) + for(int i = 0; i < 16; i++) + { + bool unique = true; + for(int j = 0; j < i; j++) { + if( m_color[i] != m_color[j] ) { + unique = false; + } + } + + if( unique ) { + count++; + } + } + + return count; +}*/ + +/*/// Get average color of the block. +Color32 ColorBlock::averageColor() const +{ + uint r, g, b, a; + r = g = b = a = 0; + + for(uint i = 0; i < 16; i++) { + r += m_color[i].r; + g += m_color[i].g; + b += m_color[i].b; + a += m_color[i].a; + } + + return Color32(uint8(r / 16), uint8(g / 16), uint8(b / 16), uint8(a / 16)); +}*/ + +/// Return true if the block is not fully opaque. +bool ColorBlock::hasAlpha() const +{ + for (uint i = 0; i < 16; i++) + { + if (m_color[i].a != 255) return true; + } + return false; +} + +#if 0 + +/// Get diameter color range. +void ColorBlock::diameterRange(Color32 * start, Color32 * end) const +{ + nvDebugCheck(start != NULL); + nvDebugCheck(end != NULL); + + Color32 c0, c1; + uint best_dist = 0; + + for(int i = 0; i < 16; i++) { + for (int j = i+1; j < 16; j++) { + uint dist = colorDistance(m_color[i], m_color[j]); + if( dist > best_dist ) { + best_dist = dist; + c0 = m_color[i]; + c1 = m_color[j]; + } + } + } + + *start = c0; + *end = c1; +} + +/// Get luminance color range. +void ColorBlock::luminanceRange(Color32 * start, Color32 * end) const +{ + nvDebugCheck(start != NULL); + nvDebugCheck(end != NULL); + + Color32 minColor, maxColor; + uint minLuminance, maxLuminance; + + maxLuminance = minLuminance = colorLuminance(m_color[0]); + + for(uint i = 1; i < 16; i++) + { + uint luminance = colorLuminance(m_color[i]); + + if (luminance > maxLuminance) { + maxLuminance = luminance; + maxColor = m_color[i]; + } + else if (luminance < minLuminance) { + minLuminance = luminance; + minColor = m_color[i]; + } + } + + *start = minColor; + *end = maxColor; +} + +/// Get color range based on the bounding box. +void ColorBlock::boundsRange(Color32 * start, Color32 * end) const +{ + nvDebugCheck(start != NULL); + nvDebugCheck(end != NULL); + + Color32 minColor(255, 255, 255); + Color32 maxColor(0, 0, 0); + + for(uint i = 0; i < 16; i++) + { + if (m_color[i].r < minColor.r) { minColor.r = m_color[i].r; } + if (m_color[i].g < minColor.g) { minColor.g = m_color[i].g; } + if (m_color[i].b < minColor.b) { minColor.b = m_color[i].b; } + if (m_color[i].r > maxColor.r) { maxColor.r = m_color[i].r; } + if (m_color[i].g > maxColor.g) { maxColor.g = m_color[i].g; } + if (m_color[i].b > maxColor.b) { maxColor.b = m_color[i].b; } + } + + // Offset range by 1/16 of the extents + Color32 inset; + inset.r = (maxColor.r - minColor.r) >> 4; + inset.g = (maxColor.g - minColor.g) >> 4; + inset.b = (maxColor.b - minColor.b) >> 4; + + minColor.r = (minColor.r + inset.r <= 255) ? minColor.r + inset.r : 255; + minColor.g = (minColor.g + inset.g <= 255) ? minColor.g + inset.g : 255; + minColor.b = (minColor.b + inset.b <= 255) ? minColor.b + inset.b : 255; + + maxColor.r = (maxColor.r >= inset.r) ? maxColor.r - inset.r : 0; + maxColor.g = (maxColor.g >= inset.g) ? maxColor.g - inset.g : 0; + maxColor.b = (maxColor.b >= inset.b) ? maxColor.b - inset.b : 0; + + *start = minColor; + *end = maxColor; +} + +/// Get color range based on the bounding box. +void ColorBlock::boundsRangeAlpha(Color32 * start, Color32 * end) const +{ + nvDebugCheck(start != NULL); + nvDebugCheck(end != NULL); + + Color32 minColor(255, 255, 255, 255); + Color32 maxColor(0, 0, 0, 0); + + for(uint i = 0; i < 16; i++) + { + if (m_color[i].r < minColor.r) { minColor.r = m_color[i].r; } + if (m_color[i].g < minColor.g) { minColor.g = m_color[i].g; } + if (m_color[i].b < minColor.b) { minColor.b = m_color[i].b; } + if (m_color[i].a < minColor.a) { minColor.a = m_color[i].a; } + if (m_color[i].r > maxColor.r) { maxColor.r = m_color[i].r; } + if (m_color[i].g > maxColor.g) { maxColor.g = m_color[i].g; } + if (m_color[i].b > maxColor.b) { maxColor.b = m_color[i].b; } + if (m_color[i].a > maxColor.a) { maxColor.a = m_color[i].a; } + } + + // Offset range by 1/16 of the extents + Color32 inset; + inset.r = (maxColor.r - minColor.r) >> 4; + inset.g = (maxColor.g - minColor.g) >> 4; + inset.b = (maxColor.b - minColor.b) >> 4; + inset.a = (maxColor.a - minColor.a) >> 4; + + minColor.r = (minColor.r + inset.r <= 255) ? minColor.r + inset.r : 255; + minColor.g = (minColor.g + inset.g <= 255) ? minColor.g + inset.g : 255; + minColor.b = (minColor.b + inset.b <= 255) ? minColor.b + inset.b : 255; + minColor.a = (minColor.a + inset.a <= 255) ? minColor.a + inset.a : 255; + + maxColor.r = (maxColor.r >= inset.r) ? maxColor.r - inset.r : 0; + maxColor.g = (maxColor.g >= inset.g) ? maxColor.g - inset.g : 0; + maxColor.b = (maxColor.b >= inset.b) ? maxColor.b - inset.b : 0; + maxColor.a = (maxColor.a >= inset.a) ? maxColor.a - inset.a : 0; + + *start = minColor; + *end = maxColor; +} +#endif + +/*/// Sort colors by abosolute value in their 16 bit representation. +void ColorBlock::sortColorsByAbsoluteValue() +{ + // Dummy selection sort. + for( uint a = 0; a < 16; a++ ) { + uint max = a; + Color16 cmax(m_color[a]); + + for( uint b = a+1; b < 16; b++ ) { + Color16 cb(m_color[b]); + + if( cb.u > cmax.u ) { + max = b; + cmax = cb; + } + } + swap( m_color[a], m_color[max] ); + } +}*/ + + +/*/// Find extreme colors in the given axis. +void ColorBlock::computeRange(Vector3::Arg axis, Color32 * start, Color32 * end) const +{ + nvDebugCheck(start != NULL); + nvDebugCheck(end != NULL); + + int mini, maxi; + mini = maxi = 0; + + float min, max; + min = max = dot(Vector3(m_color[0].r, m_color[0].g, m_color[0].b), axis); + + for(uint i = 1; i < 16; i++) + { + const Vector3 vec(m_color[i].r, m_color[i].g, m_color[i].b); + + float val = dot(vec, axis); + if( val < min ) { + mini = i; + min = val; + } + else if( val > max ) { + maxi = i; + max = val; + } + } + + *start = m_color[mini]; + *end = m_color[maxi]; +}*/ + + +/*/// Sort colors in the given axis. +void ColorBlock::sortColors(const Vector3 & axis) +{ + float luma_array[16]; + + for(uint i = 0; i < 16; i++) { + const Vector3 vec(m_color[i].r, m_color[i].g, m_color[i].b); + luma_array[i] = dot(vec, axis); + } + + // Dummy selection sort. + for( uint a = 0; a < 16; a++ ) { + uint min = a; + for( uint b = a+1; b < 16; b++ ) { + if( luma_array[b] < luma_array[min] ) { + min = b; + } + } + swap( luma_array[a], luma_array[min] ); + swap( m_color[a], m_color[min] ); + } +}*/ + + +/*/// Get the volume of the color block. +float ColorBlock::volume() const +{ + Box bounds; + bounds.clearBounds(); + + for(int i = 0; i < 16; i++) { + const Vector3 point(m_color[i].r, m_color[i].g, m_color[i].b); + bounds.addPointToBounds(point); + } + + return bounds.volume(); +}*/ + + +void ColorSet::setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y) +{ + nvDebugCheck(img_x < img_w && img_y < img_h); + + w = min(4U, img_w - img_x); + h = min(4U, img_h - img_y); + nvDebugCheck(w != 0 && h != 0); + + count = w * h; + + const float * r = data + img_w * img_h * 0; + const float * g = data + img_w * img_h * 1; + const float * b = data + img_w * img_h * 2; + const float * a = data + img_w * img_h * 3; + + // Set colors. + for (uint y = 0, i = 0; y < h; y++) + { + for (uint x = 0; x < w; x++, i++) + { + uint idx = x + img_x + (y + img_y) * img_w; + colors[i].x = r[idx]; + colors[i].y = g[idx]; + colors[i].z = b[idx]; + colors[i].w = a[idx]; + } + } +} + +void ColorSet::setAlphaWeights() +{ + for (uint i = 0; i < count; i++) + { + weights[i] = max(colors[i].w, 0.001f); // Avoid division by zero. + } +} + +void ColorSet::setUniformWeights() +{ + for (uint i = 0; i < count; i++) + { + weights[i] = 1.0f; + } +} + + +void ColorSet::createMinimalSet(bool ignoreTransparent) +{ + nvDebugCheck(count == w*h); // Do not call this method multiple times. + + Vector4 C[16]; + float W[16]; + memcpy(C, colors, sizeof(Vector4)*count); + memcpy(W, weights, sizeof(float)*count); + + uint n = 0; + for (uint y = 0, i = 0; y < h; y++) + { + for (uint x = 0; x < w; x++, i++) + { + if (ignoreTransparent && C[i].w == 0) { + continue; + } + + uint idx = y * 4 + x; + + // loop over previous points for a match + for (int j = 0; ; j++) + { + // allocate a new point + if (j == i) + { + colors[n] = C[i]; + weights[n] = W[i]; + remap[idx] = n; + n++; + break; + } + + // check for a match + bool colorMatch = (C[i].x == C[j].x) && (C[i].w == C[j].w) && (C[i].z == C[j].z); + //bool alphaMatch = (C[i].w == C[j].w); + + if (colorMatch) + { + // get the index of the match + int index = remap[j]; + + // map to this point and increase the weight + weights[index] += W[i]; + remap[idx] = index; + break; + } + } + } + } + + count = n; + + // Avoid empty blocks. + if (count == 0) { + count = 1; + //colors[0] = C[0]; + //weights[0] = W[0]; + memset(remap, 0, sizeof(int)*16); + } +} + + +// Fill blocks that are smaller than (4,4) by wrapping indices. +void ColorSet::wrapIndices() +{ + for (uint y = h; y < 4; y++) + { + uint base = (y % h) * w; + for (uint x = w; x < 4; x++) + { + remap[y*4+3] = remap[base + (x % w)]; + } + } +} + +bool ColorSet::isSingleColor(bool ignoreAlpha) const +{ + Vector4 v = colors[0]; + if (ignoreAlpha) v.w = 1.0f; + + for (uint i = 1; i < count; i++) + { + Vector4 c = colors[i]; + if (ignoreAlpha) c.w = 1.0f; + + if (v != c) { + return false; + } + } + + return true; +} + + +// 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0 +static inline float component(Vector4::Arg c, uint i) +{ + if (i == 0) return c.x; + if (i == 1) return c.y; + if (i == 2) return c.z; + if (i == 3) return c.w; + if (i == 4) return 0xFF; + return 0; +} + +void ColorSet::swizzle(uint x, uint y, uint z, uint w) +{ + for (uint i = 0; i < count; i++) + { + Vector4 c = colors[i]; + colors[i].x = component(c, x); + colors[i].y = component(c, y); + colors[i].z = component(c, z); + colors[i].w = component(c, w); + } +} + +bool ColorSet::hasAlpha() const +{ + for (uint i = 0; i < count; i++) + { + if (colors[i].w != 0.0f) return true; + } + return false; +} diff --git a/src/nvimage/ColorBlock.h b/src/nvimage/ColorBlock.h index 054bb61..ea0aaec 100644 --- a/src/nvimage/ColorBlock.h +++ b/src/nvimage/ColorBlock.h @@ -1,117 +1,117 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#pragma once -#ifndef NV_IMAGE_COLORBLOCK_H -#define NV_IMAGE_COLORBLOCK_H - -#include "nvmath/Color.h" -#include "nvmath/Vector.h" - -namespace nv -{ - class Image; - class FloatImage; - - /// Uncompressed 4x4 color block. - struct ColorBlock - { - ColorBlock(); - ColorBlock(const uint * linearImage); - ColorBlock(const ColorBlock & block); - ColorBlock(const Image * img, uint x, uint y); - - void init(const Image * img, uint x, uint y); - void init(uint w, uint h, const uint * data, uint x, uint y); - void init(uint w, uint h, const float * data, uint x, uint y); - - void swizzle(uint x, uint y, uint z, uint w); // 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0 - - bool isSingleColor(Color32 mask = Color32(0xFF, 0xFF, 0xFF, 0x00)) const; - bool hasAlpha() const; - - - // Accessors - const Color32 * colors() const; - - Color32 color(uint i) const; - Color32 & color(uint i); - - Color32 color(uint x, uint y) const; - Color32 & color(uint x, uint y); - - private: - - Color32 m_color[4*4]; - - }; - - - /// Get pointer to block colors. - inline const Color32 * ColorBlock::colors() const - { - return m_color; - } - - /// Get block color. - inline Color32 ColorBlock::color(uint i) const - { - nvDebugCheck(i < 16); - return m_color[i]; - } - - /// Get block color. - inline Color32 & ColorBlock::color(uint i) - { - nvDebugCheck(i < 16); - return m_color[i]; - } - - /// Get block color. - inline Color32 ColorBlock::color(uint x, uint y) const - { - nvDebugCheck(x < 4 && y < 4); - return m_color[y * 4 + x]; - } - - /// Get block color. - inline Color32 & ColorBlock::color(uint x, uint y) - { - nvDebugCheck(x < 4 && y < 4); - return m_color[y * 4 + x]; - } - - - struct ColorSet - { - void setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y); - - void setAlphaWeights(); - void setUniformWeights(); - - void createMinimalSet(bool ignoreTransparent); - void wrapIndices(); - - void swizzle(uint x, uint y, uint z, uint w); // 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0 - - bool isSingleColor(bool ignoreAlpha) const; - bool hasAlpha() const; - - // These methods require indices to be set: - Vector4 color(uint x, uint y) const { nvDebugCheck(x < w && y < h); return colors[remap[y * 4 + x]]; } - Vector4 & color(uint x, uint y) { nvDebugCheck(x < w && y < h); return colors[remap[y * 4 + x]]; } - - Vector4 color(uint i) const { nvDebugCheck(i < 16); return colors[remap[i]]; } - Vector4 & color(uint i) { nvDebugCheck(i < 16); return colors[remap[i]]; } - - - uint count; - uint w, h; - - Vector4 colors[16]; - float weights[16]; - int remap[16]; - }; - -} // nv namespace - -#endif // NV_IMAGE_COLORBLOCK_H +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_IMAGE_COLORBLOCK_H +#define NV_IMAGE_COLORBLOCK_H + +#include "nvmath/Color.h" +#include "nvmath/Vector.h" + +namespace nv +{ + class Image; + class FloatImage; + + /// Uncompressed 4x4 color block. + struct ColorBlock + { + ColorBlock(); + ColorBlock(const uint * linearImage); + ColorBlock(const ColorBlock & block); + ColorBlock(const Image * img, uint x, uint y); + + void init(const Image * img, uint x, uint y); + void init(uint w, uint h, const uint * data, uint x, uint y); + void init(uint w, uint h, const float * data, uint x, uint y); + + void swizzle(uint x, uint y, uint z, uint w); // 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0 + + bool isSingleColor(Color32 mask = Color32(0xFF, 0xFF, 0xFF, 0x00)) const; + bool hasAlpha() const; + + + // Accessors + const Color32 * colors() const; + + Color32 color(uint i) const; + Color32 & color(uint i); + + Color32 color(uint x, uint y) const; + Color32 & color(uint x, uint y); + + private: + + Color32 m_color[4*4]; + + }; + + + /// Get pointer to block colors. + inline const Color32 * ColorBlock::colors() const + { + return m_color; + } + + /// Get block color. + inline Color32 ColorBlock::color(uint i) const + { + nvDebugCheck(i < 16); + return m_color[i]; + } + + /// Get block color. + inline Color32 & ColorBlock::color(uint i) + { + nvDebugCheck(i < 16); + return m_color[i]; + } + + /// Get block color. + inline Color32 ColorBlock::color(uint x, uint y) const + { + nvDebugCheck(x < 4 && y < 4); + return m_color[y * 4 + x]; + } + + /// Get block color. + inline Color32 & ColorBlock::color(uint x, uint y) + { + nvDebugCheck(x < 4 && y < 4); + return m_color[y * 4 + x]; + } + + + struct ColorSet + { + void setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y); + + void setAlphaWeights(); + void setUniformWeights(); + + void createMinimalSet(bool ignoreTransparent); + void wrapIndices(); + + void swizzle(uint x, uint y, uint z, uint w); // 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0 + + bool isSingleColor(bool ignoreAlpha) const; + bool hasAlpha() const; + + // These methods require indices to be set: + Vector4 color(uint x, uint y) const { nvDebugCheck(x < w && y < h); return colors[remap[y * 4 + x]]; } + Vector4 & color(uint x, uint y) { nvDebugCheck(x < w && y < h); return colors[remap[y * 4 + x]]; } + + Vector4 color(uint i) const { nvDebugCheck(i < 16); return colors[remap[i]]; } + Vector4 & color(uint i) { nvDebugCheck(i < 16); return colors[remap[i]]; } + + + uint count; + uint w, h; + + Vector4 colors[16]; + float weights[16]; + int remap[16]; + }; + +} // nv namespace + +#endif // NV_IMAGE_COLORBLOCK_H diff --git a/src/nvimage/ColorSpace.cpp b/src/nvimage/ColorSpace.cpp index f6ac4ce..11bdae0 100644 --- a/src/nvimage/ColorSpace.cpp +++ b/src/nvimage/ColorSpace.cpp @@ -1,10 +1,10 @@ // This code is in the public domain -- jim@tilander.org - + #include - -#include -#include - + +#include +#include + #include "ColorSpace.h" namespace nv @@ -67,4 +67,4 @@ namespace nv } } } -} +} diff --git a/src/nvimage/DirectDrawSurface.cpp b/src/nvimage/DirectDrawSurface.cpp index aa8ff4d..c9faa76 100644 --- a/src/nvimage/DirectDrawSurface.cpp +++ b/src/nvimage/DirectDrawSurface.cpp @@ -1,1620 +1,1620 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "DirectDrawSurface.h" -#include "ColorBlock.h" -#include "Image.h" -#include "BlockDXT.h" -#include "PixelFormat.h" - -#include "nvcore/Debug.h" -#include "nvcore/Utils.h" // max -#include "nvcore/StdStream.h" - -#include // memset - - -using namespace nv; - - -const uint nv::FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T'); - -const uint nv::FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '); -const uint nv::FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'); -const uint nv::FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'); -const uint nv::FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'); -const uint nv::FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'); -const uint nv::FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'); -const uint nv::FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'); -const uint nv::FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'); -const uint nv::FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'); - - - -namespace -{ - - static const uint FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'); - - static const uint FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'); - - static const uint FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R'); - - - - static const uint DDSD_CAPS = 0x00000001U; - static const uint DDSD_PIXELFORMAT = 0x00001000U; - static const uint DDSD_WIDTH = 0x00000004U; - static const uint DDSD_HEIGHT = 0x00000002U; - static const uint DDSD_PITCH = 0x00000008U; - static const uint DDSD_MIPMAPCOUNT = 0x00020000U; - static const uint DDSD_LINEARSIZE = 0x00080000U; - static const uint DDSD_DEPTH = 0x00800000U; - - static const uint DDSCAPS_COMPLEX = 0x00000008U; - static const uint DDSCAPS_TEXTURE = 0x00001000U; - static const uint DDSCAPS_MIPMAP = 0x00400000U; - static const uint DDSCAPS2_VOLUME = 0x00200000U; - static const uint DDSCAPS2_CUBEMAP = 0x00000200U; - - static const uint DDSCAPS2_CUBEMAP_POSITIVEX = 0x00000400U; - static const uint DDSCAPS2_CUBEMAP_NEGATIVEX = 0x00000800U; - static const uint DDSCAPS2_CUBEMAP_POSITIVEY = 0x00001000U; - static const uint DDSCAPS2_CUBEMAP_NEGATIVEY = 0x00002000U; - static const uint DDSCAPS2_CUBEMAP_POSITIVEZ = 0x00004000U; - static const uint DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000U; - static const uint DDSCAPS2_CUBEMAP_ALL_FACES = 0x0000FC00U; - - - const char * getDxgiFormatString(DXGI_FORMAT dxgiFormat) - { -#define CASE(format) case DXGI_FORMAT_##format: return #format - switch(dxgiFormat) - { - CASE(UNKNOWN); - - CASE(R32G32B32A32_TYPELESS); - CASE(R32G32B32A32_FLOAT); - CASE(R32G32B32A32_UINT); - CASE(R32G32B32A32_SINT); - - CASE(R32G32B32_TYPELESS); - CASE(R32G32B32_FLOAT); - CASE(R32G32B32_UINT); - CASE(R32G32B32_SINT); - - CASE(R16G16B16A16_TYPELESS); - CASE(R16G16B16A16_FLOAT); - CASE(R16G16B16A16_UNORM); - CASE(R16G16B16A16_UINT); - CASE(R16G16B16A16_SNORM); - CASE(R16G16B16A16_SINT); - - CASE(R32G32_TYPELESS); - CASE(R32G32_FLOAT); - CASE(R32G32_UINT); - CASE(R32G32_SINT); - - CASE(R32G8X24_TYPELESS); - CASE(D32_FLOAT_S8X24_UINT); - CASE(R32_FLOAT_X8X24_TYPELESS); - CASE(X32_TYPELESS_G8X24_UINT); - - CASE(R10G10B10A2_TYPELESS); - CASE(R10G10B10A2_UNORM); - CASE(R10G10B10A2_UINT); - - CASE(R11G11B10_FLOAT); - - CASE(R8G8B8A8_TYPELESS); - CASE(R8G8B8A8_UNORM); - CASE(R8G8B8A8_UNORM_SRGB); - CASE(R8G8B8A8_UINT); - CASE(R8G8B8A8_SNORM); - CASE(R8G8B8A8_SINT); - - CASE(R16G16_TYPELESS); - CASE(R16G16_FLOAT); - CASE(R16G16_UNORM); - CASE(R16G16_UINT); - CASE(R16G16_SNORM); - CASE(R16G16_SINT); - - CASE(R32_TYPELESS); - CASE(D32_FLOAT); - CASE(R32_FLOAT); - CASE(R32_UINT); - CASE(R32_SINT); - - CASE(R24G8_TYPELESS); - CASE(D24_UNORM_S8_UINT); - CASE(R24_UNORM_X8_TYPELESS); - CASE(X24_TYPELESS_G8_UINT); - - CASE(R8G8_TYPELESS); - CASE(R8G8_UNORM); - CASE(R8G8_UINT); - CASE(R8G8_SNORM); - CASE(R8G8_SINT); - - CASE(R16_TYPELESS); - CASE(R16_FLOAT); - CASE(D16_UNORM); - CASE(R16_UNORM); - CASE(R16_UINT); - CASE(R16_SNORM); - CASE(R16_SINT); - - CASE(R8_TYPELESS); - CASE(R8_UNORM); - CASE(R8_UINT); - CASE(R8_SNORM); - CASE(R8_SINT); - CASE(A8_UNORM); - - CASE(R1_UNORM); - - CASE(R9G9B9E5_SHAREDEXP); - - CASE(R8G8_B8G8_UNORM); - CASE(G8R8_G8B8_UNORM); - - CASE(BC1_TYPELESS); - CASE(BC1_UNORM); - CASE(BC1_UNORM_SRGB); - - CASE(BC2_TYPELESS); - CASE(BC2_UNORM); - CASE(BC2_UNORM_SRGB); - - CASE(BC3_TYPELESS); - CASE(BC3_UNORM); - CASE(BC3_UNORM_SRGB); - - CASE(BC4_TYPELESS); - CASE(BC4_UNORM); - CASE(BC4_SNORM); - - CASE(BC5_TYPELESS); - CASE(BC5_UNORM); - CASE(BC5_SNORM); - - CASE(B5G6R5_UNORM); - CASE(B5G5R5A1_UNORM); - CASE(B8G8R8A8_UNORM); - CASE(B8G8R8X8_UNORM); - - default: - return "UNKNOWN"; - } -#undef CASE - } - - const char * getD3d10ResourceDimensionString(D3D10_RESOURCE_DIMENSION resourceDimension) - { - switch(resourceDimension) - { - default: - case D3D10_RESOURCE_DIMENSION_UNKNOWN: return "UNKNOWN"; - case D3D10_RESOURCE_DIMENSION_BUFFER: return "BUFFER"; - case D3D10_RESOURCE_DIMENSION_TEXTURE1D: return "TEXTURE1D"; - case D3D10_RESOURCE_DIMENSION_TEXTURE2D: return "TEXTURE2D"; - case D3D10_RESOURCE_DIMENSION_TEXTURE3D: return "TEXTURE3D"; - } - } - - static uint pixelSize(D3DFORMAT format) { - if (format == D3DFMT_R16F) return 8*2; - if (format == D3DFMT_G16R16F) return 8*4; - if (format == D3DFMT_A16B16G16R16F) return 8*8; - if (format == D3DFMT_R32F) return 8*4; - if (format == D3DFMT_G32R32F) return 8*8; - if (format == D3DFMT_A32B32G32R32F) return 8*16; - - if (format == D3DFMT_R8G8B8) return 8*3; - if (format == D3DFMT_A8R8G8B8) return 8*4; - if (format == D3DFMT_X8R8G8B8) return 8*4; - if (format == D3DFMT_R5G6B5) return 8*2; - if (format == D3DFMT_X1R5G5B5) return 8*2; - if (format == D3DFMT_A1R5G5B5) return 8*2; - if (format == D3DFMT_A4R4G4B4) return 8*2; - if (format == D3DFMT_R3G3B2) return 8*1; - if (format == D3DFMT_A8) return 8*1; - if (format == D3DFMT_A8R3G3B2) return 8*2; - if (format == D3DFMT_X4R4G4B4) return 8*2; - if (format == D3DFMT_A2B10G10R10) return 8*4; - if (format == D3DFMT_A8B8G8R8) return 8*4; - if (format == D3DFMT_X8B8G8R8) return 8*4; - if (format == D3DFMT_G16R16) return 8*4; - if (format == D3DFMT_A2R10G10B10) return 8*4; - if (format == D3DFMT_A2B10G10R10) return 8*4; - - if (format == D3DFMT_L8) return 8*1; - if (format == D3DFMT_L16) return 8*2; - - return 0; - } - - static uint pixelSize(DXGI_FORMAT format) { - switch(format) { - case DXGI_FORMAT_R32G32B32A32_TYPELESS: - case DXGI_FORMAT_R32G32B32A32_FLOAT: - case DXGI_FORMAT_R32G32B32A32_UINT: - case DXGI_FORMAT_R32G32B32A32_SINT: - return 8*16; - - case DXGI_FORMAT_R32G32B32_TYPELESS: - case DXGI_FORMAT_R32G32B32_FLOAT: - case DXGI_FORMAT_R32G32B32_UINT: - case DXGI_FORMAT_R32G32B32_SINT: - return 8*12; - - case DXGI_FORMAT_R16G16B16A16_TYPELESS: - case DXGI_FORMAT_R16G16B16A16_FLOAT: - case DXGI_FORMAT_R16G16B16A16_UNORM: - case DXGI_FORMAT_R16G16B16A16_UINT: - case DXGI_FORMAT_R16G16B16A16_SNORM: - case DXGI_FORMAT_R16G16B16A16_SINT: - - case DXGI_FORMAT_R32G32_TYPELESS: - case DXGI_FORMAT_R32G32_FLOAT: - case DXGI_FORMAT_R32G32_UINT: - case DXGI_FORMAT_R32G32_SINT: - - case DXGI_FORMAT_R32G8X24_TYPELESS: - case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: - case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: - case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: - return 8*8; - - case DXGI_FORMAT_R10G10B10A2_TYPELESS: - case DXGI_FORMAT_R10G10B10A2_UNORM: - case DXGI_FORMAT_R10G10B10A2_UINT: - - case DXGI_FORMAT_R11G11B10_FLOAT: - - case DXGI_FORMAT_R8G8B8A8_TYPELESS: - case DXGI_FORMAT_R8G8B8A8_UNORM: - case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: - case DXGI_FORMAT_R8G8B8A8_UINT: - case DXGI_FORMAT_R8G8B8A8_SNORM: - case DXGI_FORMAT_R8G8B8A8_SINT: - - case DXGI_FORMAT_R16G16_TYPELESS: - case DXGI_FORMAT_R16G16_FLOAT: - case DXGI_FORMAT_R16G16_UNORM: - case DXGI_FORMAT_R16G16_UINT: - case DXGI_FORMAT_R16G16_SNORM: - case DXGI_FORMAT_R16G16_SINT: - - case DXGI_FORMAT_R32_TYPELESS: - case DXGI_FORMAT_D32_FLOAT: - case DXGI_FORMAT_R32_FLOAT: - case DXGI_FORMAT_R32_UINT: - case DXGI_FORMAT_R32_SINT: - - case DXGI_FORMAT_R24G8_TYPELESS: - case DXGI_FORMAT_D24_UNORM_S8_UINT: - case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: - case DXGI_FORMAT_X24_TYPELESS_G8_UINT: - return 8*4; - - case DXGI_FORMAT_R8G8_TYPELESS: - case DXGI_FORMAT_R8G8_UNORM: - case DXGI_FORMAT_R8G8_UINT: - case DXGI_FORMAT_R8G8_SNORM: - case DXGI_FORMAT_R8G8_SINT: - - case DXGI_FORMAT_R16_TYPELESS: - case DXGI_FORMAT_R16_FLOAT: - case DXGI_FORMAT_D16_UNORM: - case DXGI_FORMAT_R16_UNORM: - case DXGI_FORMAT_R16_UINT: - case DXGI_FORMAT_R16_SNORM: - case DXGI_FORMAT_R16_SINT: - return 8*2; - - case DXGI_FORMAT_R8_TYPELESS: - case DXGI_FORMAT_R8_UNORM: - case DXGI_FORMAT_R8_UINT: - case DXGI_FORMAT_R8_SNORM: - case DXGI_FORMAT_R8_SINT: - case DXGI_FORMAT_A8_UNORM: - return 8*1; - - case DXGI_FORMAT_R1_UNORM: - return 1; - - case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: - return 8*4; - - case DXGI_FORMAT_R8G8_B8G8_UNORM: - case DXGI_FORMAT_G8R8_G8B8_UNORM: - return 8*4; - - case DXGI_FORMAT_B5G6R5_UNORM: - case DXGI_FORMAT_B5G5R5A1_UNORM: - return 8*2; - - case DXGI_FORMAT_B8G8R8A8_UNORM: - case DXGI_FORMAT_B8G8R8X8_UNORM: - return 8*4; - - case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: - case DXGI_FORMAT_B8G8R8A8_TYPELESS: - case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: - case DXGI_FORMAT_B8G8R8X8_TYPELESS: - case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: - return 8*4; - } - - return 0; - } - -} // namespace - -namespace nv -{ - static Stream & operator<< (Stream & s, DDSPixelFormat & pf) - { - nvStaticCheck(sizeof(DDSPixelFormat) == 32); - s << pf.size; - s << pf.flags; - s << pf.fourcc; - s << pf.bitcount; - s.serialize(&pf.rmask, sizeof(pf.rmask)); - s.serialize(&pf.gmask, sizeof(pf.gmask)); - s.serialize(&pf.bmask, sizeof(pf.bmask)); - s.serialize(&pf.amask, sizeof(pf.amask)); - // s << pf.rmask; - // s << pf.gmask; - // s << pf.bmask; - // s << pf.amask; - return s; - } - - static Stream & operator<< (Stream & s, DDSCaps & caps) - { - nvStaticCheck(sizeof(DDSCaps) == 16); - s << caps.caps1; - s << caps.caps2; - s << caps.caps3; - s << caps.caps4; - return s; - } - - static Stream & operator<< (Stream & s, DDSHeader10 & header) - { - nvStaticCheck(sizeof(DDSHeader10) == 20); - s << header.dxgiFormat; - s << header.resourceDimension; - s << header.miscFlag; - s << header.arraySize; - s << header.reserved; - return s; - } - - Stream & operator<< (Stream & s, DDSHeader & header) - { - nvStaticCheck(sizeof(DDSHeader) == 148); - s << header.fourcc; - s << header.size; - s << header.flags; - s << header.height; - s << header.width; - s << header.pitch; - s << header.depth; - s << header.mipmapcount; - for (int i = 0; i < 11; i++) { - s << header.reserved[i]; - } - s << header.pf; - s << header.caps; - s << header.notused; - - if (header.hasDX10Header()) - { - s << header.header10; - } - - return s; - } - -} // nv namespace - -namespace -{ - struct FormatDescriptor - { - uint format; - uint bitcount; - uint rmask; - uint gmask; - uint bmask; - uint amask; - }; - - static const FormatDescriptor s_d3d9Formats[] = - { - { D3DFMT_R8G8B8, 24, 0xFF0000, 0xFF00, 0xFF, 0 }, - { D3DFMT_A8R8G8B8, 32, 0xFF0000, 0xFF00, 0xFF, 0xFF000000 }, // DXGI_FORMAT_B8G8R8A8_UNORM - { D3DFMT_X8R8G8B8, 32, 0xFF0000, 0xFF00, 0xFF, 0 }, // DXGI_FORMAT_B8G8R8X8_UNORM - { D3DFMT_R5G6B5, 16, 0xF800, 0x7E0, 0x1F, 0 }, // DXGI_FORMAT_B5G6R5_UNORM - { D3DFMT_X1R5G5B5, 16, 0x7C00, 0x3E0, 0x1F, 0 }, - { D3DFMT_A1R5G5B5, 16, 0x7C00, 0x3E0, 0x1F, 0x8000 }, // DXGI_FORMAT_B5G5R5A1_UNORM - { D3DFMT_A4R4G4B4, 16, 0xF00, 0xF0, 0xF, 0xF000 }, - { D3DFMT_R3G3B2, 8, 0xE0, 0x1C, 0x3, 0 }, - { D3DFMT_A8, 8, 0, 0, 0, 8 }, // DXGI_FORMAT_A8_UNORM - { D3DFMT_A8R3G3B2, 16, 0xE0, 0x1C, 0x3, 0xFF00 }, - { D3DFMT_X4R4G4B4, 16, 0xF00, 0xF0, 0xF, 0 }, - { D3DFMT_A2B10G10R10, 32, 0x3FF, 0xFFC00, 0x3FF00000, 0xC0000000 }, // DXGI_FORMAT_R10G10B10A2 - { D3DFMT_A8B8G8R8, 32, 0xFF, 0xFF00, 0xFF0000, 0xFF000000 }, // DXGI_FORMAT_R8G8B8A8_UNORM - { D3DFMT_X8B8G8R8, 32, 0xFF, 0xFF00, 0xFF0000, 0 }, - { D3DFMT_G16R16, 32, 0xFFFF, 0xFFFF0000, 0, 0 }, // DXGI_FORMAT_R16G16_UNORM - { D3DFMT_A2R10G10B10, 32, 0x3FF00000, 0xFFC00, 0x3FF, 0xC0000000 }, - { D3DFMT_A2B10G10R10, 32, 0x3FF, 0xFFC00, 0x3FF00000, 0xC0000000 }, - - { D3DFMT_L8, 8, 8, 0, 0, 0 }, // DXGI_FORMAT_R8_UNORM - { D3DFMT_L16, 16, 16, 0, 0, 0 }, // DXGI_FORMAT_R16_UNORM - }; - - static const uint s_d3d9FormatCount = NV_ARRAY_SIZE(s_d3d9Formats); - -} // namespace - -uint nv::findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) -{ - for (int i = 0; i < s_d3d9FormatCount; i++) - { - if (s_d3d9Formats[i].bitcount == bitcount && - s_d3d9Formats[i].rmask == rmask && - s_d3d9Formats[i].gmask == gmask && - s_d3d9Formats[i].bmask == bmask && - s_d3d9Formats[i].amask == amask) - { - return s_d3d9Formats[i].format; - } - } - - return 0; -} - - -DDSHeader::DDSHeader() -{ - this->fourcc = FOURCC_DDS; - this->size = 124; - this->flags = (DDSD_CAPS|DDSD_PIXELFORMAT); - this->height = 0; - this->width = 0; - this->pitch = 0; - this->depth = 0; - this->mipmapcount = 0; - memset(this->reserved, 0, sizeof(this->reserved)); - - // Store version information on the reserved header attributes. - this->reserved[9] = FOURCC_NVTT; - this->reserved[10] = (2 << 16) | (1 << 8) | (0); // major.minor.revision - - this->pf.size = 32; - this->pf.flags = 0; - this->pf.fourcc = 0; - this->pf.bitcount = 0; - this->pf.rmask = 0; - this->pf.gmask = 0; - this->pf.bmask = 0; - this->pf.amask = 0; - this->caps.caps1 = DDSCAPS_TEXTURE; - this->caps.caps2 = 0; - this->caps.caps3 = 0; - this->caps.caps4 = 0; - this->notused = 0; - - this->header10.dxgiFormat = DXGI_FORMAT_UNKNOWN; - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_UNKNOWN; - this->header10.miscFlag = 0; - this->header10.arraySize = 0; - this->header10.reserved = 0; -} - -void DDSHeader::setWidth(uint w) -{ - this->flags |= DDSD_WIDTH; - this->width = w; -} - -void DDSHeader::setHeight(uint h) -{ - this->flags |= DDSD_HEIGHT; - this->height = h; -} - -void DDSHeader::setDepth(uint d) -{ - this->flags |= DDSD_DEPTH; - this->depth = d; -} - -void DDSHeader::setMipmapCount(uint count) -{ - if (count == 0 || count == 1) - { - this->flags &= ~DDSD_MIPMAPCOUNT; - this->mipmapcount = 1; - - if (this->caps.caps2 == 0) { - this->caps.caps1 = DDSCAPS_TEXTURE; - } - else { - this->caps.caps1 = DDSCAPS_TEXTURE | DDSCAPS_COMPLEX; - } - } - else - { - this->flags |= DDSD_MIPMAPCOUNT; - this->mipmapcount = count; - - this->caps.caps1 |= DDSCAPS_COMPLEX | DDSCAPS_MIPMAP; - } -} - -void DDSHeader::setTexture2D() -{ - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; - this->header10.arraySize = 1; -} - -void DDSHeader::setTexture3D() -{ - this->caps.caps2 = DDSCAPS2_VOLUME; - - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE3D; - this->header10.arraySize = 1; -} - -void DDSHeader::setTextureCube() -{ - this->caps.caps1 |= DDSCAPS_COMPLEX; - this->caps.caps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALL_FACES; - - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; - this->header10.arraySize = 6; -} - -void DDSHeader::setLinearSize(uint size) -{ - this->flags &= ~DDSD_PITCH; - this->flags |= DDSD_LINEARSIZE; - this->pitch = size; -} - -void DDSHeader::setPitch(uint pitch) -{ - this->flags &= ~DDSD_LINEARSIZE; - this->flags |= DDSD_PITCH; - this->pitch = pitch; -} - -void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3) -{ - // set fourcc pixel format. - this->pf.flags = DDPF_FOURCC; - this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3); - - this->pf.bitcount = 0; - this->pf.rmask = 0; - this->pf.gmask = 0; - this->pf.bmask = 0; - this->pf.amask = 0; -} - -void DDSHeader::setFormatCode(uint32 code) -{ - // set fourcc pixel format. - this->pf.flags = DDPF_FOURCC; - this->pf.fourcc = code; - - this->pf.bitcount = 0; - this->pf.rmask = 0; - this->pf.gmask = 0; - this->pf.bmask = 0; - this->pf.amask = 0; -} - -void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3) -{ - this->pf.bitcount = MAKEFOURCC(c0, c1, c2, c3); -} - - -void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) -{ - // Make sure the masks are correct. - nvCheck((rmask & gmask) == 0); - nvCheck((rmask & bmask) == 0); - nvCheck((rmask & amask) == 0); - nvCheck((gmask & bmask) == 0); - nvCheck((gmask & amask) == 0); - nvCheck((bmask & amask) == 0); - - if (rmask != 0 || gmask != 0 || bmask != 0) - { - if (gmask == 0 && bmask == 0) - { - this->pf.flags = DDPF_LUMINANCE; - } - else - { - this->pf.flags = DDPF_RGB; - } - - if (amask != 0) { - this->pf.flags |= DDPF_ALPHAPIXELS; - } - } - else if (amask != 0) - { - this->pf.flags |= DDPF_ALPHA; - } - - if (bitcount == 0) - { - // Compute bit count from the masks. - uint total = rmask | gmask | bmask | amask; - while(total != 0) { - bitcount++; - total >>= 1; - } - } - - // D3DX functions do not like this: - this->pf.fourcc = 0; //findD3D9Format(bitcount, rmask, gmask, bmask, amask); - /*if (this->pf.fourcc) { - this->pf.flags |= DDPF_FOURCC; - }*/ - - nvCheck(bitcount > 0 && bitcount <= 32); - this->pf.bitcount = bitcount; - this->pf.rmask = rmask; - this->pf.gmask = gmask; - this->pf.bmask = bmask; - this->pf.amask = amask; -} - -void DDSHeader::setDX10Format(uint format) -{ - this->pf.flags = DDPF_FOURCC; - this->pf.fourcc = FOURCC_DX10; - this->header10.dxgiFormat = format; -} - -void DDSHeader::setNormalFlag(bool b) -{ - if (b) this->pf.flags |= DDPF_NORMAL; - else this->pf.flags &= ~DDPF_NORMAL; -} - -void DDSHeader::setSrgbFlag(bool b) -{ - if (b) this->pf.flags |= DDPF_SRGB; - else this->pf.flags &= ~DDPF_SRGB; -} - -void DDSHeader::setHasAlphaFlag(bool b) -{ - if (b) this->pf.flags |= DDPF_ALPHAPIXELS; - else this->pf.flags &= ~DDPF_ALPHAPIXELS; -} - -void DDSHeader::setUserVersion(int version) -{ - this->reserved[7] = FOURCC_UVER; - this->reserved[8] = version; -} - -void DDSHeader::swapBytes() -{ - this->fourcc = POSH_LittleU32(this->fourcc); - this->size = POSH_LittleU32(this->size); - this->flags = POSH_LittleU32(this->flags); - this->height = POSH_LittleU32(this->height); - this->width = POSH_LittleU32(this->width); - this->pitch = POSH_LittleU32(this->pitch); - this->depth = POSH_LittleU32(this->depth); - this->mipmapcount = POSH_LittleU32(this->mipmapcount); - - for(int i = 0; i < 11; i++) { - this->reserved[i] = POSH_LittleU32(this->reserved[i]); - } - - this->pf.size = POSH_LittleU32(this->pf.size); - this->pf.flags = POSH_LittleU32(this->pf.flags); - this->pf.fourcc = POSH_LittleU32(this->pf.fourcc); - this->pf.bitcount = POSH_LittleU32(this->pf.bitcount); - this->pf.rmask = POSH_LittleU32(this->pf.rmask); - this->pf.gmask = POSH_LittleU32(this->pf.gmask); - this->pf.bmask = POSH_LittleU32(this->pf.bmask); - this->pf.amask = POSH_LittleU32(this->pf.amask); - this->caps.caps1 = POSH_LittleU32(this->caps.caps1); - this->caps.caps2 = POSH_LittleU32(this->caps.caps2); - this->caps.caps3 = POSH_LittleU32(this->caps.caps3); - this->caps.caps4 = POSH_LittleU32(this->caps.caps4); - this->notused = POSH_LittleU32(this->notused); - - this->header10.dxgiFormat = POSH_LittleU32(this->header10.dxgiFormat); - this->header10.resourceDimension = POSH_LittleU32(this->header10.resourceDimension); - this->header10.miscFlag = POSH_LittleU32(this->header10.miscFlag); - this->header10.arraySize = POSH_LittleU32(this->header10.arraySize); - this->header10.reserved = POSH_LittleU32(this->header10.reserved); -} - -bool DDSHeader::hasDX10Header() const -{ - //if (pf.flags & DDPF_FOURCC) { - return this->pf.fourcc == FOURCC_DX10; - //} - //return false; -} - -uint DDSHeader::signature() const -{ - return this->reserved[9]; -} - -uint DDSHeader::toolVersion() const -{ - return this->reserved[10]; -} - -uint DDSHeader::userVersion() const -{ - if (this->reserved[7] == FOURCC_UVER) { - return this->reserved[8]; - } - return 0; -} - -bool DDSHeader::isNormalMap() const -{ - return (pf.flags & DDPF_NORMAL) != 0; -} - -bool DDSHeader::isSrgb() const -{ - return (pf.flags & DDPF_SRGB) != 0; -} - -bool DDSHeader::hasAlpha() const -{ - return (pf.flags & DDPF_ALPHAPIXELS) != 0; -} - -uint DDSHeader::d3d9Format() const -{ - if (pf.flags & DDPF_FOURCC) { - return pf.fourcc; - } - else { - return findD3D9Format(pf.bitcount, pf.rmask, pf.gmask, pf.bmask, pf.amask); - } -} - -uint DDSHeader::pixelSize() const -{ - if (hasDX10Header()) { - return ::pixelSize((DXGI_FORMAT)header10.dxgiFormat); - } - else { - if (flags & DDPF_FOURCC) { - return ::pixelSize((D3DFORMAT)pf.fourcc); - } - else { - nvDebugCheck((pf.flags & DDPF_RGB) || (pf.flags & DDPF_LUMINANCE)); - return pf.bitcount; - } - } -} - -uint DDSHeader::blockSize() const -{ - switch(pf.fourcc) - { - case FOURCC_DXT1: - case FOURCC_ATI1: - return 8; - case FOURCC_DXT2: - case FOURCC_DXT3: - case FOURCC_DXT4: - case FOURCC_DXT5: - case FOURCC_RXGB: - case FOURCC_ATI2: - return 16; - case FOURCC_DX10: - switch(header10.dxgiFormat) - { - case DXGI_FORMAT_BC1_TYPELESS: - case DXGI_FORMAT_BC1_UNORM: - case DXGI_FORMAT_BC1_UNORM_SRGB: - case DXGI_FORMAT_BC4_TYPELESS: - case DXGI_FORMAT_BC4_UNORM: - case DXGI_FORMAT_BC4_SNORM: - return 8; - case DXGI_FORMAT_BC2_TYPELESS: - case DXGI_FORMAT_BC2_UNORM: - case DXGI_FORMAT_BC2_UNORM_SRGB: - case DXGI_FORMAT_BC3_TYPELESS: - case DXGI_FORMAT_BC3_UNORM: - case DXGI_FORMAT_BC3_UNORM_SRGB: - case DXGI_FORMAT_BC5_TYPELESS: - case DXGI_FORMAT_BC5_UNORM: - case DXGI_FORMAT_BC5_SNORM: - case DXGI_FORMAT_BC6H_TYPELESS: - case DXGI_FORMAT_BC6H_SF16: - case DXGI_FORMAT_BC6H_UF16: - case DXGI_FORMAT_BC7_TYPELESS: - case DXGI_FORMAT_BC7_UNORM: - case DXGI_FORMAT_BC7_UNORM_SRGB: - return 16; - }; - }; - - // Not a block image. - return 0; -} - -bool DDSHeader::isBlockFormat() const -{ - return blockSize() != 0; -} - - - - - -DirectDrawSurface::DirectDrawSurface() : stream(NULL) -{ -} - -DirectDrawSurface::DirectDrawSurface(const char * name) : stream(NULL) -{ - load(name); -} - -DirectDrawSurface::DirectDrawSurface(Stream * s) : stream(NULL) -{ - load(s); -} - -DirectDrawSurface::~DirectDrawSurface() -{ - delete stream; -} - -bool DirectDrawSurface::load(const char * filename) -{ - return load(new StdInputStream(filename)); -} - -bool DirectDrawSurface::load(Stream * stream) -{ - delete this->stream; - this->stream = stream; - - if (!stream->isError()) - { - (*stream) << header; - return true; - } - - return false; -} - -bool DirectDrawSurface::isValid() const -{ - if (stream == NULL || stream->isError()) - { - return false; - } - - if (header.fourcc != FOURCC_DDS || header.size != 124) - { - return false; - } - - const uint required = (DDSD_WIDTH|DDSD_HEIGHT/*|DDSD_CAPS|DDSD_PIXELFORMAT*/); - if( (header.flags & required) != required ) { - return false; - } - - if (header.pf.size != 32) { - return false; - } - - if( !(header.caps.caps1 & DDSCAPS_TEXTURE) ) { - return false; - } - - return true; -} - -bool DirectDrawSurface::isSupported() const -{ - nvDebugCheck(isValid()); - - if (header.hasDX10Header()) - { - if (header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM) - { - return true; - } - - return false; - } - else - { - if (header.pf.flags & DDPF_FOURCC) - { - if (header.pf.fourcc != FOURCC_DXT1 && - header.pf.fourcc != FOURCC_DXT2 && - header.pf.fourcc != FOURCC_DXT3 && - header.pf.fourcc != FOURCC_DXT4 && - header.pf.fourcc != FOURCC_DXT5 && - header.pf.fourcc != FOURCC_RXGB && - header.pf.fourcc != FOURCC_ATI1 && - header.pf.fourcc != FOURCC_ATI2) - { - // Unknown fourcc code. - return false; - } - } - else if ((header.pf.flags & DDPF_RGB) || (header.pf.flags & DDPF_LUMINANCE)) - { - // All RGB and luminance formats are supported now. - } - else - { - return false; - } - - if (isTextureCube()) { - if (header.width != header.height) return false; - - if ((header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES) - { - // Cubemaps must contain all faces. - return false; - } - } - } - - return true; -} - -bool DirectDrawSurface::hasAlpha() const -{ - if (header.hasDX10Header()) - { -#pragma NV_MESSAGE("TODO: Update hasAlpha to handle all DX10 formats.") - return - header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || - header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM; - } - else - { - if (header.pf.flags & DDPF_RGB) - { - return header.pf.amask != 0; - } - else if (header.pf.flags & DDPF_FOURCC) - { - if (header.pf.fourcc == FOURCC_RXGB || - header.pf.fourcc == FOURCC_ATI1 || - header.pf.fourcc == FOURCC_ATI2 || - header.pf.flags & DDPF_NORMAL) - { - return false; - } - else - { - // @@ Here we could check the ALPHA_PIXELS flag, but nobody sets it. (except us?) - return true; - } - } - - return false; - } -} - -uint DirectDrawSurface::mipmapCount() const -{ - nvDebugCheck(isValid()); - if (header.flags & DDSD_MIPMAPCOUNT) return header.mipmapcount; - else return 1; -} - - -uint DirectDrawSurface::width() const -{ - nvDebugCheck(isValid()); - if (header.flags & DDSD_WIDTH) return header.width; - else return 1; -} - -uint DirectDrawSurface::height() const -{ - nvDebugCheck(isValid()); - if (header.flags & DDSD_HEIGHT) return header.height; - else return 1; -} - -uint DirectDrawSurface::depth() const -{ - nvDebugCheck(isValid()); - if (header.flags & DDSD_DEPTH) return header.depth; - else return 1; -} - -bool DirectDrawSurface::isTexture1D() const -{ - nvDebugCheck(isValid()); - if (header.hasDX10Header()) - { - return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE1D; - } - return false; -} - -bool DirectDrawSurface::isTexture2D() const -{ - nvDebugCheck(isValid()); - if (header.hasDX10Header()) - { - return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE2D; - } - else - { - return !isTexture3D() && !isTextureCube(); - } -} - -bool DirectDrawSurface::isTexture3D() const -{ - nvDebugCheck(isValid()); - if (header.hasDX10Header()) - { - return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE3D; - } - else - { - return (header.caps.caps2 & DDSCAPS2_VOLUME) != 0; - } -} - -bool DirectDrawSurface::isTextureCube() const -{ - nvDebugCheck(isValid()); - return (header.caps.caps2 & DDSCAPS2_CUBEMAP) != 0; -} - -void DirectDrawSurface::setNormalFlag(bool b) -{ - nvDebugCheck(isValid()); - header.setNormalFlag(b); -} - -void DirectDrawSurface::setHasAlphaFlag(bool b) -{ - nvDebugCheck(isValid()); - header.setHasAlphaFlag(b); -} - -void DirectDrawSurface::setUserVersion(int version) -{ - nvDebugCheck(isValid()); - header.setUserVersion(version); -} - -void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap) -{ - nvDebugCheck(isValid()); - - stream->seek(offset(face, mipmap)); - - uint w = width(); - uint h = height(); - uint d = depth(); - - // Compute width and height. - for (uint m = 0; m < mipmap; m++) - { - w = max(1U, w / 2); - h = max(1U, h / 2); - d = max(1U, d / 2); - } - - img->allocate(w, h, d); - - if (hasAlpha()) - { - img->setFormat(Image::Format_ARGB); - } - else - { - img->setFormat(Image::Format_RGB); - } - - if (header.hasDX10Header()) - { - // So far only block formats supported. - readBlockImage(img); - } - else - { - if (header.pf.flags & DDPF_RGB) - { - readLinearImage(img); - } - else if (header.pf.flags & DDPF_FOURCC) - { - readBlockImage(img); - } - } -} - -/*void * DirectDrawSurface::readData(uint * sizePtr) -{ - uint header_size = 128; // sizeof(DDSHeader); - - if (header.hasDX10Header()) - { - header_size += 20; // sizeof(DDSHeader10); - } - - stream->seek(header_size); - - int size = stream->size() - header_size; - *sizePtr = size; - - void * data = new unsigned char [size]; - - size = stream->serialize(data, size); - nvDebugCheck(size == *sizePtr); - - return data; -}*/ - -/*uint DirectDrawSurface::surfaceSize(uint mipmap) const -{ - uint w = header.width(); - uint h = header.height(); - uint d = header.depth(); - for (int m = 0; m < mipmap; m++) { - w = (w + 1) / 2; - h = (h + 1) / 2; - d = (d + 1) / 2; - } - - bool isBlockFormat; - uint blockOrPixelSize; - - if (header.hasDX10Header()) { - blockOrPixelSize = blockSize(header10.dxgiFormat); - isBlockFormat = (blockOrPixelSize != 0); - if (isBlockFormat) { - blockOrPixelSize = pixelSize(header10.dxgiFormat); - } - } - else { - header.pf.flags - } - - if (isBlockFormat) { - w = (w + 3) / 4; - h = (h + 3) / 4; - d = (d + 3) / 4; // @@ Is it necessary to align the depths? - } - - uint blockOrPixelCount = w * h * d; - - return blockCount = blockOrPixelSize; -}*/ - -bool DirectDrawSurface::readSurface(uint face, uint mipmap, void * data, uint size) -{ - if (size != surfaceSize(mipmap)) return false; - - stream->seek(offset(face, mipmap)); - if (stream->isError()) return false; - - return stream->serialize(data, size) == size; -} - - -void DirectDrawSurface::readLinearImage(Image * img) -{ - nvDebugCheck(stream != NULL); - nvDebugCheck(img != NULL); - - const uint w = img->width(); - const uint h = img->height(); - - uint rshift, rsize; - PixelFormat::maskShiftAndSize(header.pf.rmask, &rshift, &rsize); - - uint gshift, gsize; - PixelFormat::maskShiftAndSize(header.pf.gmask, &gshift, &gsize); - - uint bshift, bsize; - PixelFormat::maskShiftAndSize(header.pf.bmask, &bshift, &bsize); - - uint ashift, asize; - PixelFormat::maskShiftAndSize(header.pf.amask, &ashift, &asize); - - uint byteCount = (header.pf.bitcount + 7) / 8; - -#pragma NV_MESSAGE("TODO: Support floating point linear images and other FOURCC codes.") - - // Read linear RGB images. - for (uint y = 0; y < h; y++) - { - for (uint x = 0; x < w; x++) - { - uint c = 0; - stream->serialize(&c, byteCount); - - Color32 pixel(0, 0, 0, 0xFF); - pixel.r = PixelFormat::convert((c & header.pf.rmask) >> rshift, rsize, 8); - pixel.g = PixelFormat::convert((c & header.pf.gmask) >> gshift, gsize, 8); - pixel.b = PixelFormat::convert((c & header.pf.bmask) >> bshift, bsize, 8); - pixel.a = PixelFormat::convert((c & header.pf.amask) >> ashift, asize, 8); - - img->pixel(x, y) = pixel; - } - } -} - -void DirectDrawSurface::readBlockImage(Image * img) -{ - nvDebugCheck(stream != NULL); - nvDebugCheck(img != NULL); - - const uint w = img->width(); - const uint h = img->height(); - - const uint bw = (w + 3) / 4; - const uint bh = (h + 3) / 4; - - for (uint by = 0; by < bh; by++) - { - for (uint bx = 0; bx < bw; bx++) - { - ColorBlock block; - - // Read color block. - readBlock(&block); - - // Write color block. - for (uint y = 0; y < min(4U, h-4*by); y++) - { - for (uint x = 0; x < min(4U, w-4*bx); x++) - { - img->pixel(4*bx+x, 4*by+y) = block.color(x, y); - } - } - } - } -} - -static Color32 buildNormal(uint8 x, uint8 y) -{ - float nx = 2 * (x / 255.0f) - 1; - float ny = 2 * (y / 255.0f) - 1; - float nz = 0.0f; - if (1 - nx*nx - ny*ny > 0) nz = sqrtf(1 - nx*nx - ny*ny); - uint8 z = clamp(int(255.0f * (nz + 1) / 2.0f), 0, 255); - - return Color32(x, y, z); -} - - -void DirectDrawSurface::readBlock(ColorBlock * rgba) -{ - nvDebugCheck(stream != NULL); - nvDebugCheck(rgba != NULL); - - uint fourcc = header.pf.fourcc; - - // Map DX10 block formats to fourcc codes. - if (header.hasDX10Header()) - { - if (header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM) fourcc = FOURCC_DXT1; - if (header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM) fourcc = FOURCC_DXT3; - if (header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM) fourcc = FOURCC_DXT5; - if (header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM) fourcc = FOURCC_ATI1; - if (header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM) fourcc = FOURCC_ATI2; - } - - - if (fourcc == FOURCC_DXT1) - { - BlockDXT1 block; - *stream << block; - block.decodeBlock(rgba); - } - else if (fourcc == FOURCC_DXT2 || fourcc == FOURCC_DXT3) - { - BlockDXT3 block; - *stream << block; - block.decodeBlock(rgba); - } - else if (fourcc == FOURCC_DXT4 || fourcc == FOURCC_DXT5 || fourcc == FOURCC_RXGB) - { - BlockDXT5 block; - *stream << block; - block.decodeBlock(rgba); - - if (fourcc == FOURCC_RXGB) - { - // Swap R & A. - for (int i = 0; i < 16; i++) - { - Color32 & c = rgba->color(i); - uint tmp = c.r; - c.r = c.a; - c.a = tmp; - } - } - } - else if (fourcc == FOURCC_ATI1) - { - BlockATI1 block; - *stream << block; - block.decodeBlock(rgba); - } - else if (fourcc == FOURCC_ATI2) - { - BlockATI2 block; - *stream << block; - block.decodeBlock(rgba); - } - - // If normal flag set, convert to normal. - if (header.pf.flags & DDPF_NORMAL) - { - if (fourcc == FOURCC_ATI2) - { - for (int i = 0; i < 16; i++) - { - Color32 & c = rgba->color(i); - c = buildNormal(c.r, c.g); - } - } - else if (fourcc == FOURCC_DXT5) - { - for (int i = 0; i < 16; i++) - { - Color32 & c = rgba->color(i); - c = buildNormal(c.a, c.g); - } - } - } -} - - -static uint mipmapExtent(uint mipmap, uint x) -{ - for (uint m = 0; m < mipmap; m++) { - x = max(1U, x / 2); - } - return x; -} - -uint DirectDrawSurface::surfaceWidth(uint mipmap) const -{ - return mipmapExtent(mipmap, width()); -} - -uint DirectDrawSurface::surfaceHeight(uint mipmap) const -{ - return mipmapExtent(mipmap, height()); -} - -uint DirectDrawSurface::surfaceDepth(uint mipmap) const -{ - return mipmapExtent(mipmap, depth()); -} - -uint DirectDrawSurface::surfaceSize(uint mipmap) const -{ - uint w = surfaceWidth(mipmap); - uint h = surfaceHeight(mipmap); - uint d = surfaceDepth(mipmap); - - uint blockSize = header.blockSize(); - - if (blockSize == 0) { - uint bitCount = header.pixelSize(); - uint pitch = computeBytePitch(w, bitCount, 1); // Asuming 1 byte alignment, which is the same D3DX expects. - return pitch * h * d; - } - else { - w = (w + 3) / 4; - h = (h + 3) / 4; - d = d; // @@ How are 3D textures aligned? - return blockSize * w * h * d; - } -} - -uint DirectDrawSurface::faceSize() const -{ - const uint count = mipmapCount(); - uint size = 0; - - for (uint m = 0; m < count; m++) - { - size += surfaceSize(m); - } - - return size; -} - -uint DirectDrawSurface::offset(const uint face, const uint mipmap) -{ - uint size = 128; // sizeof(DDSHeader); - - if (header.hasDX10Header()) - { - size += 20; // sizeof(DDSHeader10); - } - - if (face != 0) - { - size += face * faceSize(); - } - - for (uint m = 0; m < mipmap; m++) - { - size += surfaceSize(m); - } - - return size; -} - - -void DirectDrawSurface::printInfo() const -{ - printf("Flags: 0x%.8X\n", header.flags); - if (header.flags & DDSD_CAPS) printf("\tDDSD_CAPS\n"); - if (header.flags & DDSD_PIXELFORMAT) printf("\tDDSD_PIXELFORMAT\n"); - if (header.flags & DDSD_WIDTH) printf("\tDDSD_WIDTH\n"); - if (header.flags & DDSD_HEIGHT) printf("\tDDSD_HEIGHT\n"); - if (header.flags & DDSD_DEPTH) printf("\tDDSD_DEPTH\n"); - if (header.flags & DDSD_PITCH) printf("\tDDSD_PITCH\n"); - if (header.flags & DDSD_LINEARSIZE) printf("\tDDSD_LINEARSIZE\n"); - if (header.flags & DDSD_MIPMAPCOUNT) printf("\tDDSD_MIPMAPCOUNT\n"); - - printf("Height: %d\n", header.height); - printf("Width: %d\n", header.width); - printf("Depth: %d\n", header.depth); - if (header.flags & DDSD_PITCH) printf("Pitch: %d\n", header.pitch); - else if (header.flags & DDSD_LINEARSIZE) printf("Linear size: %d\n", header.pitch); - printf("Mipmap count: %d\n", header.mipmapcount); - - printf("Pixel Format:\n"); - printf("\tFlags: 0x%.8X\n", header.pf.flags); - if (header.pf.flags & DDPF_RGB) printf("\t\tDDPF_RGB\n"); - if (header.pf.flags & DDPF_LUMINANCE) printf("\t\tDDPF_LUMINANCE\n"); - if (header.pf.flags & DDPF_FOURCC) printf("\t\tDDPF_FOURCC\n"); - if (header.pf.flags & DDPF_ALPHAPIXELS) printf("\t\tDDPF_ALPHAPIXELS\n"); - if (header.pf.flags & DDPF_ALPHA) printf("\t\tDDPF_ALPHA\n"); - if (header.pf.flags & DDPF_PALETTEINDEXED1) printf("\t\tDDPF_PALETTEINDEXED1\n"); - if (header.pf.flags & DDPF_PALETTEINDEXED2) printf("\t\tDDPF_PALETTEINDEXED2\n"); - if (header.pf.flags & DDPF_PALETTEINDEXED4) printf("\t\tDDPF_PALETTEINDEXED4\n"); - if (header.pf.flags & DDPF_PALETTEINDEXED8) printf("\t\tDDPF_PALETTEINDEXED8\n"); - if (header.pf.flags & DDPF_ALPHAPREMULT) printf("\t\tDDPF_ALPHAPREMULT\n"); - if (header.pf.flags & DDPF_NORMAL) printf("\t\tDDPF_NORMAL\n"); - - if (header.pf.fourcc != 0) { - // Display fourcc code even when DDPF_FOURCC flag not set. - printf("\tFourCC: '%c%c%c%c' (0x%.8X)\n", - ((header.pf.fourcc >> 0) & 0xFF), - ((header.pf.fourcc >> 8) & 0xFF), - ((header.pf.fourcc >> 16) & 0xFF), - ((header.pf.fourcc >> 24) & 0xFF), - header.pf.fourcc); - } - - if ((header.pf.flags & DDPF_FOURCC) && (header.pf.bitcount != 0)) - { - printf("\tSwizzle: '%c%c%c%c' (0x%.8X)\n", - (header.pf.bitcount >> 0) & 0xFF, - (header.pf.bitcount >> 8) & 0xFF, - (header.pf.bitcount >> 16) & 0xFF, - (header.pf.bitcount >> 24) & 0xFF, - header.pf.bitcount); - } - else - { - printf("\tBit count: %d\n", header.pf.bitcount); - } - - printf("\tRed mask: 0x%.8X\n", header.pf.rmask); - printf("\tGreen mask: 0x%.8X\n", header.pf.gmask); - printf("\tBlue mask: 0x%.8X\n", header.pf.bmask); - printf("\tAlpha mask: 0x%.8X\n", header.pf.amask); - - printf("Caps:\n"); - printf("\tCaps 1: 0x%.8X\n", header.caps.caps1); - if (header.caps.caps1 & DDSCAPS_COMPLEX) printf("\t\tDDSCAPS_COMPLEX\n"); - if (header.caps.caps1 & DDSCAPS_TEXTURE) printf("\t\tDDSCAPS_TEXTURE\n"); - if (header.caps.caps1 & DDSCAPS_MIPMAP) printf("\t\tDDSCAPS_MIPMAP\n"); - - printf("\tCaps 2: 0x%.8X\n", header.caps.caps2); - if (header.caps.caps2 & DDSCAPS2_VOLUME) printf("\t\tDDSCAPS2_VOLUME\n"); - else if (header.caps.caps2 & DDSCAPS2_CUBEMAP) - { - printf("\t\tDDSCAPS2_CUBEMAP\n"); - if ((header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) == DDSCAPS2_CUBEMAP_ALL_FACES) printf("\t\tDDSCAPS2_CUBEMAP_ALL_FACES\n"); - else { - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEX) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEX\n"); - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEX) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEX\n"); - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEY) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEY\n"); - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEY) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEY\n"); - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEZ) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEZ\n"); - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEZ) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEZ\n"); - } - } - - printf("\tCaps 3: 0x%.8X\n", header.caps.caps3); - printf("\tCaps 4: 0x%.8X\n", header.caps.caps4); - - if (header.hasDX10Header()) - { - printf("DX10 Header:\n"); - printf("\tDXGI Format: %u (%s)\n", header.header10.dxgiFormat, getDxgiFormatString((DXGI_FORMAT)header.header10.dxgiFormat)); - printf("\tResource dimension: %u (%s)\n", header.header10.resourceDimension, getD3d10ResourceDimensionString((D3D10_RESOURCE_DIMENSION)header.header10.resourceDimension)); - printf("\tMisc flag: %u\n", header.header10.miscFlag); - printf("\tArray size: %u\n", header.header10.arraySize); - } - - if (header.reserved[9] == FOURCC_NVTT) - { - int major = (header.reserved[10] >> 16) & 0xFF; - int minor = (header.reserved[10] >> 8) & 0xFF; - int revision= header.reserved[10] & 0xFF; - - printf("Version:\n"); - printf("\tNVIDIA Texture Tools %d.%d.%d\n", major, minor, revision); - } - - if (header.reserved[7] == FOURCC_UVER) - { - printf("User Version: %d\n", header.reserved[8]); - } -} - +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "DirectDrawSurface.h" +#include "ColorBlock.h" +#include "Image.h" +#include "BlockDXT.h" +#include "PixelFormat.h" + +#include "nvcore/Debug.h" +#include "nvcore/Utils.h" // max +#include "nvcore/StdStream.h" + +#include // memset + + +using namespace nv; + + +const uint nv::FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T'); + +const uint nv::FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '); +const uint nv::FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'); +const uint nv::FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'); +const uint nv::FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'); +const uint nv::FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'); +const uint nv::FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'); +const uint nv::FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'); +const uint nv::FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'); +const uint nv::FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'); + + + +namespace +{ + + static const uint FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'); + + static const uint FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'); + + static const uint FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R'); + + + + static const uint DDSD_CAPS = 0x00000001U; + static const uint DDSD_PIXELFORMAT = 0x00001000U; + static const uint DDSD_WIDTH = 0x00000004U; + static const uint DDSD_HEIGHT = 0x00000002U; + static const uint DDSD_PITCH = 0x00000008U; + static const uint DDSD_MIPMAPCOUNT = 0x00020000U; + static const uint DDSD_LINEARSIZE = 0x00080000U; + static const uint DDSD_DEPTH = 0x00800000U; + + static const uint DDSCAPS_COMPLEX = 0x00000008U; + static const uint DDSCAPS_TEXTURE = 0x00001000U; + static const uint DDSCAPS_MIPMAP = 0x00400000U; + static const uint DDSCAPS2_VOLUME = 0x00200000U; + static const uint DDSCAPS2_CUBEMAP = 0x00000200U; + + static const uint DDSCAPS2_CUBEMAP_POSITIVEX = 0x00000400U; + static const uint DDSCAPS2_CUBEMAP_NEGATIVEX = 0x00000800U; + static const uint DDSCAPS2_CUBEMAP_POSITIVEY = 0x00001000U; + static const uint DDSCAPS2_CUBEMAP_NEGATIVEY = 0x00002000U; + static const uint DDSCAPS2_CUBEMAP_POSITIVEZ = 0x00004000U; + static const uint DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000U; + static const uint DDSCAPS2_CUBEMAP_ALL_FACES = 0x0000FC00U; + + + const char * getDxgiFormatString(DXGI_FORMAT dxgiFormat) + { +#define CASE(format) case DXGI_FORMAT_##format: return #format + switch(dxgiFormat) + { + CASE(UNKNOWN); + + CASE(R32G32B32A32_TYPELESS); + CASE(R32G32B32A32_FLOAT); + CASE(R32G32B32A32_UINT); + CASE(R32G32B32A32_SINT); + + CASE(R32G32B32_TYPELESS); + CASE(R32G32B32_FLOAT); + CASE(R32G32B32_UINT); + CASE(R32G32B32_SINT); + + CASE(R16G16B16A16_TYPELESS); + CASE(R16G16B16A16_FLOAT); + CASE(R16G16B16A16_UNORM); + CASE(R16G16B16A16_UINT); + CASE(R16G16B16A16_SNORM); + CASE(R16G16B16A16_SINT); + + CASE(R32G32_TYPELESS); + CASE(R32G32_FLOAT); + CASE(R32G32_UINT); + CASE(R32G32_SINT); + + CASE(R32G8X24_TYPELESS); + CASE(D32_FLOAT_S8X24_UINT); + CASE(R32_FLOAT_X8X24_TYPELESS); + CASE(X32_TYPELESS_G8X24_UINT); + + CASE(R10G10B10A2_TYPELESS); + CASE(R10G10B10A2_UNORM); + CASE(R10G10B10A2_UINT); + + CASE(R11G11B10_FLOAT); + + CASE(R8G8B8A8_TYPELESS); + CASE(R8G8B8A8_UNORM); + CASE(R8G8B8A8_UNORM_SRGB); + CASE(R8G8B8A8_UINT); + CASE(R8G8B8A8_SNORM); + CASE(R8G8B8A8_SINT); + + CASE(R16G16_TYPELESS); + CASE(R16G16_FLOAT); + CASE(R16G16_UNORM); + CASE(R16G16_UINT); + CASE(R16G16_SNORM); + CASE(R16G16_SINT); + + CASE(R32_TYPELESS); + CASE(D32_FLOAT); + CASE(R32_FLOAT); + CASE(R32_UINT); + CASE(R32_SINT); + + CASE(R24G8_TYPELESS); + CASE(D24_UNORM_S8_UINT); + CASE(R24_UNORM_X8_TYPELESS); + CASE(X24_TYPELESS_G8_UINT); + + CASE(R8G8_TYPELESS); + CASE(R8G8_UNORM); + CASE(R8G8_UINT); + CASE(R8G8_SNORM); + CASE(R8G8_SINT); + + CASE(R16_TYPELESS); + CASE(R16_FLOAT); + CASE(D16_UNORM); + CASE(R16_UNORM); + CASE(R16_UINT); + CASE(R16_SNORM); + CASE(R16_SINT); + + CASE(R8_TYPELESS); + CASE(R8_UNORM); + CASE(R8_UINT); + CASE(R8_SNORM); + CASE(R8_SINT); + CASE(A8_UNORM); + + CASE(R1_UNORM); + + CASE(R9G9B9E5_SHAREDEXP); + + CASE(R8G8_B8G8_UNORM); + CASE(G8R8_G8B8_UNORM); + + CASE(BC1_TYPELESS); + CASE(BC1_UNORM); + CASE(BC1_UNORM_SRGB); + + CASE(BC2_TYPELESS); + CASE(BC2_UNORM); + CASE(BC2_UNORM_SRGB); + + CASE(BC3_TYPELESS); + CASE(BC3_UNORM); + CASE(BC3_UNORM_SRGB); + + CASE(BC4_TYPELESS); + CASE(BC4_UNORM); + CASE(BC4_SNORM); + + CASE(BC5_TYPELESS); + CASE(BC5_UNORM); + CASE(BC5_SNORM); + + CASE(B5G6R5_UNORM); + CASE(B5G5R5A1_UNORM); + CASE(B8G8R8A8_UNORM); + CASE(B8G8R8X8_UNORM); + + default: + return "UNKNOWN"; + } +#undef CASE + } + + const char * getD3d10ResourceDimensionString(D3D10_RESOURCE_DIMENSION resourceDimension) + { + switch(resourceDimension) + { + default: + case D3D10_RESOURCE_DIMENSION_UNKNOWN: return "UNKNOWN"; + case D3D10_RESOURCE_DIMENSION_BUFFER: return "BUFFER"; + case D3D10_RESOURCE_DIMENSION_TEXTURE1D: return "TEXTURE1D"; + case D3D10_RESOURCE_DIMENSION_TEXTURE2D: return "TEXTURE2D"; + case D3D10_RESOURCE_DIMENSION_TEXTURE3D: return "TEXTURE3D"; + } + } + + static uint pixelSize(D3DFORMAT format) { + if (format == D3DFMT_R16F) return 8*2; + if (format == D3DFMT_G16R16F) return 8*4; + if (format == D3DFMT_A16B16G16R16F) return 8*8; + if (format == D3DFMT_R32F) return 8*4; + if (format == D3DFMT_G32R32F) return 8*8; + if (format == D3DFMT_A32B32G32R32F) return 8*16; + + if (format == D3DFMT_R8G8B8) return 8*3; + if (format == D3DFMT_A8R8G8B8) return 8*4; + if (format == D3DFMT_X8R8G8B8) return 8*4; + if (format == D3DFMT_R5G6B5) return 8*2; + if (format == D3DFMT_X1R5G5B5) return 8*2; + if (format == D3DFMT_A1R5G5B5) return 8*2; + if (format == D3DFMT_A4R4G4B4) return 8*2; + if (format == D3DFMT_R3G3B2) return 8*1; + if (format == D3DFMT_A8) return 8*1; + if (format == D3DFMT_A8R3G3B2) return 8*2; + if (format == D3DFMT_X4R4G4B4) return 8*2; + if (format == D3DFMT_A2B10G10R10) return 8*4; + if (format == D3DFMT_A8B8G8R8) return 8*4; + if (format == D3DFMT_X8B8G8R8) return 8*4; + if (format == D3DFMT_G16R16) return 8*4; + if (format == D3DFMT_A2R10G10B10) return 8*4; + if (format == D3DFMT_A2B10G10R10) return 8*4; + + if (format == D3DFMT_L8) return 8*1; + if (format == D3DFMT_L16) return 8*2; + + return 0; + } + + static uint pixelSize(DXGI_FORMAT format) { + switch(format) { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32A32_UINT: + case DXGI_FORMAT_R32G32B32A32_SINT: + return 8*16; + + case DXGI_FORMAT_R32G32B32_TYPELESS: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R32G32B32_UINT: + case DXGI_FORMAT_R32G32B32_SINT: + return 8*12; + + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R16G16B16A16_UINT: + case DXGI_FORMAT_R16G16B16A16_SNORM: + case DXGI_FORMAT_R16G16B16A16_SINT: + + case DXGI_FORMAT_R32G32_TYPELESS: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R32G32_UINT: + case DXGI_FORMAT_R32G32_SINT: + + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + return 8*8; + + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + + case DXGI_FORMAT_R11G11B10_FLOAT: + + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_SINT: + + case DXGI_FORMAT_R16G16_TYPELESS: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R16G16_UINT: + case DXGI_FORMAT_R16G16_SNORM: + case DXGI_FORMAT_R16G16_SINT: + + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R32_UINT: + case DXGI_FORMAT_R32_SINT: + + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + return 8*4; + + case DXGI_FORMAT_R8G8_TYPELESS: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R8G8_UINT: + case DXGI_FORMAT_R8G8_SNORM: + case DXGI_FORMAT_R8G8_SINT: + + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R16_UINT: + case DXGI_FORMAT_R16_SNORM: + case DXGI_FORMAT_R16_SINT: + return 8*2; + + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R8_UINT: + case DXGI_FORMAT_R8_SNORM: + case DXGI_FORMAT_R8_SINT: + case DXGI_FORMAT_A8_UNORM: + return 8*1; + + case DXGI_FORMAT_R1_UNORM: + return 1; + + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + return 8*4; + + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + return 8*4; + + case DXGI_FORMAT_B5G6R5_UNORM: + case DXGI_FORMAT_B5G5R5A1_UNORM: + return 8*2; + + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + return 8*4; + + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8X8_TYPELESS: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + return 8*4; + } + + return 0; + } + +} // namespace + +namespace nv +{ + static Stream & operator<< (Stream & s, DDSPixelFormat & pf) + { + nvStaticCheck(sizeof(DDSPixelFormat) == 32); + s << pf.size; + s << pf.flags; + s << pf.fourcc; + s << pf.bitcount; + s.serialize(&pf.rmask, sizeof(pf.rmask)); + s.serialize(&pf.gmask, sizeof(pf.gmask)); + s.serialize(&pf.bmask, sizeof(pf.bmask)); + s.serialize(&pf.amask, sizeof(pf.amask)); + // s << pf.rmask; + // s << pf.gmask; + // s << pf.bmask; + // s << pf.amask; + return s; + } + + static Stream & operator<< (Stream & s, DDSCaps & caps) + { + nvStaticCheck(sizeof(DDSCaps) == 16); + s << caps.caps1; + s << caps.caps2; + s << caps.caps3; + s << caps.caps4; + return s; + } + + static Stream & operator<< (Stream & s, DDSHeader10 & header) + { + nvStaticCheck(sizeof(DDSHeader10) == 20); + s << header.dxgiFormat; + s << header.resourceDimension; + s << header.miscFlag; + s << header.arraySize; + s << header.reserved; + return s; + } + + Stream & operator<< (Stream & s, DDSHeader & header) + { + nvStaticCheck(sizeof(DDSHeader) == 148); + s << header.fourcc; + s << header.size; + s << header.flags; + s << header.height; + s << header.width; + s << header.pitch; + s << header.depth; + s << header.mipmapcount; + for (int i = 0; i < 11; i++) { + s << header.reserved[i]; + } + s << header.pf; + s << header.caps; + s << header.notused; + + if (header.hasDX10Header()) + { + s << header.header10; + } + + return s; + } + +} // nv namespace + +namespace +{ + struct FormatDescriptor + { + uint format; + uint bitcount; + uint rmask; + uint gmask; + uint bmask; + uint amask; + }; + + static const FormatDescriptor s_d3d9Formats[] = + { + { D3DFMT_R8G8B8, 24, 0xFF0000, 0xFF00, 0xFF, 0 }, + { D3DFMT_A8R8G8B8, 32, 0xFF0000, 0xFF00, 0xFF, 0xFF000000 }, // DXGI_FORMAT_B8G8R8A8_UNORM + { D3DFMT_X8R8G8B8, 32, 0xFF0000, 0xFF00, 0xFF, 0 }, // DXGI_FORMAT_B8G8R8X8_UNORM + { D3DFMT_R5G6B5, 16, 0xF800, 0x7E0, 0x1F, 0 }, // DXGI_FORMAT_B5G6R5_UNORM + { D3DFMT_X1R5G5B5, 16, 0x7C00, 0x3E0, 0x1F, 0 }, + { D3DFMT_A1R5G5B5, 16, 0x7C00, 0x3E0, 0x1F, 0x8000 }, // DXGI_FORMAT_B5G5R5A1_UNORM + { D3DFMT_A4R4G4B4, 16, 0xF00, 0xF0, 0xF, 0xF000 }, + { D3DFMT_R3G3B2, 8, 0xE0, 0x1C, 0x3, 0 }, + { D3DFMT_A8, 8, 0, 0, 0, 8 }, // DXGI_FORMAT_A8_UNORM + { D3DFMT_A8R3G3B2, 16, 0xE0, 0x1C, 0x3, 0xFF00 }, + { D3DFMT_X4R4G4B4, 16, 0xF00, 0xF0, 0xF, 0 }, + { D3DFMT_A2B10G10R10, 32, 0x3FF, 0xFFC00, 0x3FF00000, 0xC0000000 }, // DXGI_FORMAT_R10G10B10A2 + { D3DFMT_A8B8G8R8, 32, 0xFF, 0xFF00, 0xFF0000, 0xFF000000 }, // DXGI_FORMAT_R8G8B8A8_UNORM + { D3DFMT_X8B8G8R8, 32, 0xFF, 0xFF00, 0xFF0000, 0 }, + { D3DFMT_G16R16, 32, 0xFFFF, 0xFFFF0000, 0, 0 }, // DXGI_FORMAT_R16G16_UNORM + { D3DFMT_A2R10G10B10, 32, 0x3FF00000, 0xFFC00, 0x3FF, 0xC0000000 }, + { D3DFMT_A2B10G10R10, 32, 0x3FF, 0xFFC00, 0x3FF00000, 0xC0000000 }, + + { D3DFMT_L8, 8, 8, 0, 0, 0 }, // DXGI_FORMAT_R8_UNORM + { D3DFMT_L16, 16, 16, 0, 0, 0 }, // DXGI_FORMAT_R16_UNORM + }; + + static const uint s_d3d9FormatCount = NV_ARRAY_SIZE(s_d3d9Formats); + +} // namespace + +uint nv::findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) +{ + for (int i = 0; i < s_d3d9FormatCount; i++) + { + if (s_d3d9Formats[i].bitcount == bitcount && + s_d3d9Formats[i].rmask == rmask && + s_d3d9Formats[i].gmask == gmask && + s_d3d9Formats[i].bmask == bmask && + s_d3d9Formats[i].amask == amask) + { + return s_d3d9Formats[i].format; + } + } + + return 0; +} + + +DDSHeader::DDSHeader() +{ + this->fourcc = FOURCC_DDS; + this->size = 124; + this->flags = (DDSD_CAPS|DDSD_PIXELFORMAT); + this->height = 0; + this->width = 0; + this->pitch = 0; + this->depth = 0; + this->mipmapcount = 0; + memset(this->reserved, 0, sizeof(this->reserved)); + + // Store version information on the reserved header attributes. + this->reserved[9] = FOURCC_NVTT; + this->reserved[10] = (2 << 16) | (1 << 8) | (0); // major.minor.revision + + this->pf.size = 32; + this->pf.flags = 0; + this->pf.fourcc = 0; + this->pf.bitcount = 0; + this->pf.rmask = 0; + this->pf.gmask = 0; + this->pf.bmask = 0; + this->pf.amask = 0; + this->caps.caps1 = DDSCAPS_TEXTURE; + this->caps.caps2 = 0; + this->caps.caps3 = 0; + this->caps.caps4 = 0; + this->notused = 0; + + this->header10.dxgiFormat = DXGI_FORMAT_UNKNOWN; + this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_UNKNOWN; + this->header10.miscFlag = 0; + this->header10.arraySize = 0; + this->header10.reserved = 0; +} + +void DDSHeader::setWidth(uint w) +{ + this->flags |= DDSD_WIDTH; + this->width = w; +} + +void DDSHeader::setHeight(uint h) +{ + this->flags |= DDSD_HEIGHT; + this->height = h; +} + +void DDSHeader::setDepth(uint d) +{ + this->flags |= DDSD_DEPTH; + this->depth = d; +} + +void DDSHeader::setMipmapCount(uint count) +{ + if (count == 0 || count == 1) + { + this->flags &= ~DDSD_MIPMAPCOUNT; + this->mipmapcount = 1; + + if (this->caps.caps2 == 0) { + this->caps.caps1 = DDSCAPS_TEXTURE; + } + else { + this->caps.caps1 = DDSCAPS_TEXTURE | DDSCAPS_COMPLEX; + } + } + else + { + this->flags |= DDSD_MIPMAPCOUNT; + this->mipmapcount = count; + + this->caps.caps1 |= DDSCAPS_COMPLEX | DDSCAPS_MIPMAP; + } +} + +void DDSHeader::setTexture2D() +{ + this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; + this->header10.arraySize = 1; +} + +void DDSHeader::setTexture3D() +{ + this->caps.caps2 = DDSCAPS2_VOLUME; + + this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE3D; + this->header10.arraySize = 1; +} + +void DDSHeader::setTextureCube() +{ + this->caps.caps1 |= DDSCAPS_COMPLEX; + this->caps.caps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALL_FACES; + + this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; + this->header10.arraySize = 6; +} + +void DDSHeader::setLinearSize(uint size) +{ + this->flags &= ~DDSD_PITCH; + this->flags |= DDSD_LINEARSIZE; + this->pitch = size; +} + +void DDSHeader::setPitch(uint pitch) +{ + this->flags &= ~DDSD_LINEARSIZE; + this->flags |= DDSD_PITCH; + this->pitch = pitch; +} + +void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3) +{ + // set fourcc pixel format. + this->pf.flags = DDPF_FOURCC; + this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3); + + this->pf.bitcount = 0; + this->pf.rmask = 0; + this->pf.gmask = 0; + this->pf.bmask = 0; + this->pf.amask = 0; +} + +void DDSHeader::setFormatCode(uint32 code) +{ + // set fourcc pixel format. + this->pf.flags = DDPF_FOURCC; + this->pf.fourcc = code; + + this->pf.bitcount = 0; + this->pf.rmask = 0; + this->pf.gmask = 0; + this->pf.bmask = 0; + this->pf.amask = 0; +} + +void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3) +{ + this->pf.bitcount = MAKEFOURCC(c0, c1, c2, c3); +} + + +void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) +{ + // Make sure the masks are correct. + nvCheck((rmask & gmask) == 0); + nvCheck((rmask & bmask) == 0); + nvCheck((rmask & amask) == 0); + nvCheck((gmask & bmask) == 0); + nvCheck((gmask & amask) == 0); + nvCheck((bmask & amask) == 0); + + if (rmask != 0 || gmask != 0 || bmask != 0) + { + if (gmask == 0 && bmask == 0) + { + this->pf.flags = DDPF_LUMINANCE; + } + else + { + this->pf.flags = DDPF_RGB; + } + + if (amask != 0) { + this->pf.flags |= DDPF_ALPHAPIXELS; + } + } + else if (amask != 0) + { + this->pf.flags |= DDPF_ALPHA; + } + + if (bitcount == 0) + { + // Compute bit count from the masks. + uint total = rmask | gmask | bmask | amask; + while(total != 0) { + bitcount++; + total >>= 1; + } + } + + // D3DX functions do not like this: + this->pf.fourcc = 0; //findD3D9Format(bitcount, rmask, gmask, bmask, amask); + /*if (this->pf.fourcc) { + this->pf.flags |= DDPF_FOURCC; + }*/ + + nvCheck(bitcount > 0 && bitcount <= 32); + this->pf.bitcount = bitcount; + this->pf.rmask = rmask; + this->pf.gmask = gmask; + this->pf.bmask = bmask; + this->pf.amask = amask; +} + +void DDSHeader::setDX10Format(uint format) +{ + this->pf.flags = DDPF_FOURCC; + this->pf.fourcc = FOURCC_DX10; + this->header10.dxgiFormat = format; +} + +void DDSHeader::setNormalFlag(bool b) +{ + if (b) this->pf.flags |= DDPF_NORMAL; + else this->pf.flags &= ~DDPF_NORMAL; +} + +void DDSHeader::setSrgbFlag(bool b) +{ + if (b) this->pf.flags |= DDPF_SRGB; + else this->pf.flags &= ~DDPF_SRGB; +} + +void DDSHeader::setHasAlphaFlag(bool b) +{ + if (b) this->pf.flags |= DDPF_ALPHAPIXELS; + else this->pf.flags &= ~DDPF_ALPHAPIXELS; +} + +void DDSHeader::setUserVersion(int version) +{ + this->reserved[7] = FOURCC_UVER; + this->reserved[8] = version; +} + +void DDSHeader::swapBytes() +{ + this->fourcc = POSH_LittleU32(this->fourcc); + this->size = POSH_LittleU32(this->size); + this->flags = POSH_LittleU32(this->flags); + this->height = POSH_LittleU32(this->height); + this->width = POSH_LittleU32(this->width); + this->pitch = POSH_LittleU32(this->pitch); + this->depth = POSH_LittleU32(this->depth); + this->mipmapcount = POSH_LittleU32(this->mipmapcount); + + for(int i = 0; i < 11; i++) { + this->reserved[i] = POSH_LittleU32(this->reserved[i]); + } + + this->pf.size = POSH_LittleU32(this->pf.size); + this->pf.flags = POSH_LittleU32(this->pf.flags); + this->pf.fourcc = POSH_LittleU32(this->pf.fourcc); + this->pf.bitcount = POSH_LittleU32(this->pf.bitcount); + this->pf.rmask = POSH_LittleU32(this->pf.rmask); + this->pf.gmask = POSH_LittleU32(this->pf.gmask); + this->pf.bmask = POSH_LittleU32(this->pf.bmask); + this->pf.amask = POSH_LittleU32(this->pf.amask); + this->caps.caps1 = POSH_LittleU32(this->caps.caps1); + this->caps.caps2 = POSH_LittleU32(this->caps.caps2); + this->caps.caps3 = POSH_LittleU32(this->caps.caps3); + this->caps.caps4 = POSH_LittleU32(this->caps.caps4); + this->notused = POSH_LittleU32(this->notused); + + this->header10.dxgiFormat = POSH_LittleU32(this->header10.dxgiFormat); + this->header10.resourceDimension = POSH_LittleU32(this->header10.resourceDimension); + this->header10.miscFlag = POSH_LittleU32(this->header10.miscFlag); + this->header10.arraySize = POSH_LittleU32(this->header10.arraySize); + this->header10.reserved = POSH_LittleU32(this->header10.reserved); +} + +bool DDSHeader::hasDX10Header() const +{ + //if (pf.flags & DDPF_FOURCC) { + return this->pf.fourcc == FOURCC_DX10; + //} + //return false; +} + +uint DDSHeader::signature() const +{ + return this->reserved[9]; +} + +uint DDSHeader::toolVersion() const +{ + return this->reserved[10]; +} + +uint DDSHeader::userVersion() const +{ + if (this->reserved[7] == FOURCC_UVER) { + return this->reserved[8]; + } + return 0; +} + +bool DDSHeader::isNormalMap() const +{ + return (pf.flags & DDPF_NORMAL) != 0; +} + +bool DDSHeader::isSrgb() const +{ + return (pf.flags & DDPF_SRGB) != 0; +} + +bool DDSHeader::hasAlpha() const +{ + return (pf.flags & DDPF_ALPHAPIXELS) != 0; +} + +uint DDSHeader::d3d9Format() const +{ + if (pf.flags & DDPF_FOURCC) { + return pf.fourcc; + } + else { + return findD3D9Format(pf.bitcount, pf.rmask, pf.gmask, pf.bmask, pf.amask); + } +} + +uint DDSHeader::pixelSize() const +{ + if (hasDX10Header()) { + return ::pixelSize((DXGI_FORMAT)header10.dxgiFormat); + } + else { + if (flags & DDPF_FOURCC) { + return ::pixelSize((D3DFORMAT)pf.fourcc); + } + else { + nvDebugCheck((pf.flags & DDPF_RGB) || (pf.flags & DDPF_LUMINANCE)); + return pf.bitcount; + } + } +} + +uint DDSHeader::blockSize() const +{ + switch(pf.fourcc) + { + case FOURCC_DXT1: + case FOURCC_ATI1: + return 8; + case FOURCC_DXT2: + case FOURCC_DXT3: + case FOURCC_DXT4: + case FOURCC_DXT5: + case FOURCC_RXGB: + case FOURCC_ATI2: + return 16; + case FOURCC_DX10: + switch(header10.dxgiFormat) + { + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + return 8; + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + case DXGI_FORMAT_BC6H_TYPELESS: + case DXGI_FORMAT_BC6H_SF16: + case DXGI_FORMAT_BC6H_UF16: + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + return 16; + }; + }; + + // Not a block image. + return 0; +} + +bool DDSHeader::isBlockFormat() const +{ + return blockSize() != 0; +} + + + + + +DirectDrawSurface::DirectDrawSurface() : stream(NULL) +{ +} + +DirectDrawSurface::DirectDrawSurface(const char * name) : stream(NULL) +{ + load(name); +} + +DirectDrawSurface::DirectDrawSurface(Stream * s) : stream(NULL) +{ + load(s); +} + +DirectDrawSurface::~DirectDrawSurface() +{ + delete stream; +} + +bool DirectDrawSurface::load(const char * filename) +{ + return load(new StdInputStream(filename)); +} + +bool DirectDrawSurface::load(Stream * stream) +{ + delete this->stream; + this->stream = stream; + + if (!stream->isError()) + { + (*stream) << header; + return true; + } + + return false; +} + +bool DirectDrawSurface::isValid() const +{ + if (stream == NULL || stream->isError()) + { + return false; + } + + if (header.fourcc != FOURCC_DDS || header.size != 124) + { + return false; + } + + const uint required = (DDSD_WIDTH|DDSD_HEIGHT/*|DDSD_CAPS|DDSD_PIXELFORMAT*/); + if( (header.flags & required) != required ) { + return false; + } + + if (header.pf.size != 32) { + return false; + } + + if( !(header.caps.caps1 & DDSCAPS_TEXTURE) ) { + return false; + } + + return true; +} + +bool DirectDrawSurface::isSupported() const +{ + nvDebugCheck(isValid()); + + if (header.hasDX10Header()) + { + if (header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM) + { + return true; + } + + return false; + } + else + { + if (header.pf.flags & DDPF_FOURCC) + { + if (header.pf.fourcc != FOURCC_DXT1 && + header.pf.fourcc != FOURCC_DXT2 && + header.pf.fourcc != FOURCC_DXT3 && + header.pf.fourcc != FOURCC_DXT4 && + header.pf.fourcc != FOURCC_DXT5 && + header.pf.fourcc != FOURCC_RXGB && + header.pf.fourcc != FOURCC_ATI1 && + header.pf.fourcc != FOURCC_ATI2) + { + // Unknown fourcc code. + return false; + } + } + else if ((header.pf.flags & DDPF_RGB) || (header.pf.flags & DDPF_LUMINANCE)) + { + // All RGB and luminance formats are supported now. + } + else + { + return false; + } + + if (isTextureCube()) { + if (header.width != header.height) return false; + + if ((header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES) + { + // Cubemaps must contain all faces. + return false; + } + } + } + + return true; +} + +bool DirectDrawSurface::hasAlpha() const +{ + if (header.hasDX10Header()) + { +#pragma NV_MESSAGE("TODO: Update hasAlpha to handle all DX10 formats.") + return + header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM; + } + else + { + if (header.pf.flags & DDPF_RGB) + { + return header.pf.amask != 0; + } + else if (header.pf.flags & DDPF_FOURCC) + { + if (header.pf.fourcc == FOURCC_RXGB || + header.pf.fourcc == FOURCC_ATI1 || + header.pf.fourcc == FOURCC_ATI2 || + header.pf.flags & DDPF_NORMAL) + { + return false; + } + else + { + // @@ Here we could check the ALPHA_PIXELS flag, but nobody sets it. (except us?) + return true; + } + } + + return false; + } +} + +uint DirectDrawSurface::mipmapCount() const +{ + nvDebugCheck(isValid()); + if (header.flags & DDSD_MIPMAPCOUNT) return header.mipmapcount; + else return 1; +} + + +uint DirectDrawSurface::width() const +{ + nvDebugCheck(isValid()); + if (header.flags & DDSD_WIDTH) return header.width; + else return 1; +} + +uint DirectDrawSurface::height() const +{ + nvDebugCheck(isValid()); + if (header.flags & DDSD_HEIGHT) return header.height; + else return 1; +} + +uint DirectDrawSurface::depth() const +{ + nvDebugCheck(isValid()); + if (header.flags & DDSD_DEPTH) return header.depth; + else return 1; +} + +bool DirectDrawSurface::isTexture1D() const +{ + nvDebugCheck(isValid()); + if (header.hasDX10Header()) + { + return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE1D; + } + return false; +} + +bool DirectDrawSurface::isTexture2D() const +{ + nvDebugCheck(isValid()); + if (header.hasDX10Header()) + { + return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE2D; + } + else + { + return !isTexture3D() && !isTextureCube(); + } +} + +bool DirectDrawSurface::isTexture3D() const +{ + nvDebugCheck(isValid()); + if (header.hasDX10Header()) + { + return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE3D; + } + else + { + return (header.caps.caps2 & DDSCAPS2_VOLUME) != 0; + } +} + +bool DirectDrawSurface::isTextureCube() const +{ + nvDebugCheck(isValid()); + return (header.caps.caps2 & DDSCAPS2_CUBEMAP) != 0; +} + +void DirectDrawSurface::setNormalFlag(bool b) +{ + nvDebugCheck(isValid()); + header.setNormalFlag(b); +} + +void DirectDrawSurface::setHasAlphaFlag(bool b) +{ + nvDebugCheck(isValid()); + header.setHasAlphaFlag(b); +} + +void DirectDrawSurface::setUserVersion(int version) +{ + nvDebugCheck(isValid()); + header.setUserVersion(version); +} + +void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap) +{ + nvDebugCheck(isValid()); + + stream->seek(offset(face, mipmap)); + + uint w = width(); + uint h = height(); + uint d = depth(); + + // Compute width and height. + for (uint m = 0; m < mipmap; m++) + { + w = max(1U, w / 2); + h = max(1U, h / 2); + d = max(1U, d / 2); + } + + img->allocate(w, h, d); + + if (hasAlpha()) + { + img->setFormat(Image::Format_ARGB); + } + else + { + img->setFormat(Image::Format_RGB); + } + + if (header.hasDX10Header()) + { + // So far only block formats supported. + readBlockImage(img); + } + else + { + if (header.pf.flags & DDPF_RGB) + { + readLinearImage(img); + } + else if (header.pf.flags & DDPF_FOURCC) + { + readBlockImage(img); + } + } +} + +/*void * DirectDrawSurface::readData(uint * sizePtr) +{ + uint header_size = 128; // sizeof(DDSHeader); + + if (header.hasDX10Header()) + { + header_size += 20; // sizeof(DDSHeader10); + } + + stream->seek(header_size); + + int size = stream->size() - header_size; + *sizePtr = size; + + void * data = new unsigned char [size]; + + size = stream->serialize(data, size); + nvDebugCheck(size == *sizePtr); + + return data; +}*/ + +/*uint DirectDrawSurface::surfaceSize(uint mipmap) const +{ + uint w = header.width(); + uint h = header.height(); + uint d = header.depth(); + for (int m = 0; m < mipmap; m++) { + w = (w + 1) / 2; + h = (h + 1) / 2; + d = (d + 1) / 2; + } + + bool isBlockFormat; + uint blockOrPixelSize; + + if (header.hasDX10Header()) { + blockOrPixelSize = blockSize(header10.dxgiFormat); + isBlockFormat = (blockOrPixelSize != 0); + if (isBlockFormat) { + blockOrPixelSize = pixelSize(header10.dxgiFormat); + } + } + else { + header.pf.flags + } + + if (isBlockFormat) { + w = (w + 3) / 4; + h = (h + 3) / 4; + d = (d + 3) / 4; // @@ Is it necessary to align the depths? + } + + uint blockOrPixelCount = w * h * d; + + return blockCount = blockOrPixelSize; +}*/ + +bool DirectDrawSurface::readSurface(uint face, uint mipmap, void * data, uint size) +{ + if (size != surfaceSize(mipmap)) return false; + + stream->seek(offset(face, mipmap)); + if (stream->isError()) return false; + + return stream->serialize(data, size) == size; +} + + +void DirectDrawSurface::readLinearImage(Image * img) +{ + nvDebugCheck(stream != NULL); + nvDebugCheck(img != NULL); + + const uint w = img->width(); + const uint h = img->height(); + + uint rshift, rsize; + PixelFormat::maskShiftAndSize(header.pf.rmask, &rshift, &rsize); + + uint gshift, gsize; + PixelFormat::maskShiftAndSize(header.pf.gmask, &gshift, &gsize); + + uint bshift, bsize; + PixelFormat::maskShiftAndSize(header.pf.bmask, &bshift, &bsize); + + uint ashift, asize; + PixelFormat::maskShiftAndSize(header.pf.amask, &ashift, &asize); + + uint byteCount = (header.pf.bitcount + 7) / 8; + +#pragma NV_MESSAGE("TODO: Support floating point linear images and other FOURCC codes.") + + // Read linear RGB images. + for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + uint c = 0; + stream->serialize(&c, byteCount); + + Color32 pixel(0, 0, 0, 0xFF); + pixel.r = PixelFormat::convert((c & header.pf.rmask) >> rshift, rsize, 8); + pixel.g = PixelFormat::convert((c & header.pf.gmask) >> gshift, gsize, 8); + pixel.b = PixelFormat::convert((c & header.pf.bmask) >> bshift, bsize, 8); + pixel.a = PixelFormat::convert((c & header.pf.amask) >> ashift, asize, 8); + + img->pixel(x, y) = pixel; + } + } +} + +void DirectDrawSurface::readBlockImage(Image * img) +{ + nvDebugCheck(stream != NULL); + nvDebugCheck(img != NULL); + + const uint w = img->width(); + const uint h = img->height(); + + const uint bw = (w + 3) / 4; + const uint bh = (h + 3) / 4; + + for (uint by = 0; by < bh; by++) + { + for (uint bx = 0; bx < bw; bx++) + { + ColorBlock block; + + // Read color block. + readBlock(&block); + + // Write color block. + for (uint y = 0; y < min(4U, h-4*by); y++) + { + for (uint x = 0; x < min(4U, w-4*bx); x++) + { + img->pixel(4*bx+x, 4*by+y) = block.color(x, y); + } + } + } + } +} + +static Color32 buildNormal(uint8 x, uint8 y) +{ + float nx = 2 * (x / 255.0f) - 1; + float ny = 2 * (y / 255.0f) - 1; + float nz = 0.0f; + if (1 - nx*nx - ny*ny > 0) nz = sqrtf(1 - nx*nx - ny*ny); + uint8 z = clamp(int(255.0f * (nz + 1) / 2.0f), 0, 255); + + return Color32(x, y, z); +} + + +void DirectDrawSurface::readBlock(ColorBlock * rgba) +{ + nvDebugCheck(stream != NULL); + nvDebugCheck(rgba != NULL); + + uint fourcc = header.pf.fourcc; + + // Map DX10 block formats to fourcc codes. + if (header.hasDX10Header()) + { + if (header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM) fourcc = FOURCC_DXT1; + if (header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM) fourcc = FOURCC_DXT3; + if (header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM) fourcc = FOURCC_DXT5; + if (header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM) fourcc = FOURCC_ATI1; + if (header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM) fourcc = FOURCC_ATI2; + } + + + if (fourcc == FOURCC_DXT1) + { + BlockDXT1 block; + *stream << block; + block.decodeBlock(rgba); + } + else if (fourcc == FOURCC_DXT2 || fourcc == FOURCC_DXT3) + { + BlockDXT3 block; + *stream << block; + block.decodeBlock(rgba); + } + else if (fourcc == FOURCC_DXT4 || fourcc == FOURCC_DXT5 || fourcc == FOURCC_RXGB) + { + BlockDXT5 block; + *stream << block; + block.decodeBlock(rgba); + + if (fourcc == FOURCC_RXGB) + { + // Swap R & A. + for (int i = 0; i < 16; i++) + { + Color32 & c = rgba->color(i); + uint tmp = c.r; + c.r = c.a; + c.a = tmp; + } + } + } + else if (fourcc == FOURCC_ATI1) + { + BlockATI1 block; + *stream << block; + block.decodeBlock(rgba); + } + else if (fourcc == FOURCC_ATI2) + { + BlockATI2 block; + *stream << block; + block.decodeBlock(rgba); + } + + // If normal flag set, convert to normal. + if (header.pf.flags & DDPF_NORMAL) + { + if (fourcc == FOURCC_ATI2) + { + for (int i = 0; i < 16; i++) + { + Color32 & c = rgba->color(i); + c = buildNormal(c.r, c.g); + } + } + else if (fourcc == FOURCC_DXT5) + { + for (int i = 0; i < 16; i++) + { + Color32 & c = rgba->color(i); + c = buildNormal(c.a, c.g); + } + } + } +} + + +static uint mipmapExtent(uint mipmap, uint x) +{ + for (uint m = 0; m < mipmap; m++) { + x = max(1U, x / 2); + } + return x; +} + +uint DirectDrawSurface::surfaceWidth(uint mipmap) const +{ + return mipmapExtent(mipmap, width()); +} + +uint DirectDrawSurface::surfaceHeight(uint mipmap) const +{ + return mipmapExtent(mipmap, height()); +} + +uint DirectDrawSurface::surfaceDepth(uint mipmap) const +{ + return mipmapExtent(mipmap, depth()); +} + +uint DirectDrawSurface::surfaceSize(uint mipmap) const +{ + uint w = surfaceWidth(mipmap); + uint h = surfaceHeight(mipmap); + uint d = surfaceDepth(mipmap); + + uint blockSize = header.blockSize(); + + if (blockSize == 0) { + uint bitCount = header.pixelSize(); + uint pitch = computeBytePitch(w, bitCount, 1); // Asuming 1 byte alignment, which is the same D3DX expects. + return pitch * h * d; + } + else { + w = (w + 3) / 4; + h = (h + 3) / 4; + d = d; // @@ How are 3D textures aligned? + return blockSize * w * h * d; + } +} + +uint DirectDrawSurface::faceSize() const +{ + const uint count = mipmapCount(); + uint size = 0; + + for (uint m = 0; m < count; m++) + { + size += surfaceSize(m); + } + + return size; +} + +uint DirectDrawSurface::offset(const uint face, const uint mipmap) +{ + uint size = 128; // sizeof(DDSHeader); + + if (header.hasDX10Header()) + { + size += 20; // sizeof(DDSHeader10); + } + + if (face != 0) + { + size += face * faceSize(); + } + + for (uint m = 0; m < mipmap; m++) + { + size += surfaceSize(m); + } + + return size; +} + + +void DirectDrawSurface::printInfo() const +{ + printf("Flags: 0x%.8X\n", header.flags); + if (header.flags & DDSD_CAPS) printf("\tDDSD_CAPS\n"); + if (header.flags & DDSD_PIXELFORMAT) printf("\tDDSD_PIXELFORMAT\n"); + if (header.flags & DDSD_WIDTH) printf("\tDDSD_WIDTH\n"); + if (header.flags & DDSD_HEIGHT) printf("\tDDSD_HEIGHT\n"); + if (header.flags & DDSD_DEPTH) printf("\tDDSD_DEPTH\n"); + if (header.flags & DDSD_PITCH) printf("\tDDSD_PITCH\n"); + if (header.flags & DDSD_LINEARSIZE) printf("\tDDSD_LINEARSIZE\n"); + if (header.flags & DDSD_MIPMAPCOUNT) printf("\tDDSD_MIPMAPCOUNT\n"); + + printf("Height: %d\n", header.height); + printf("Width: %d\n", header.width); + printf("Depth: %d\n", header.depth); + if (header.flags & DDSD_PITCH) printf("Pitch: %d\n", header.pitch); + else if (header.flags & DDSD_LINEARSIZE) printf("Linear size: %d\n", header.pitch); + printf("Mipmap count: %d\n", header.mipmapcount); + + printf("Pixel Format:\n"); + printf("\tFlags: 0x%.8X\n", header.pf.flags); + if (header.pf.flags & DDPF_RGB) printf("\t\tDDPF_RGB\n"); + if (header.pf.flags & DDPF_LUMINANCE) printf("\t\tDDPF_LUMINANCE\n"); + if (header.pf.flags & DDPF_FOURCC) printf("\t\tDDPF_FOURCC\n"); + if (header.pf.flags & DDPF_ALPHAPIXELS) printf("\t\tDDPF_ALPHAPIXELS\n"); + if (header.pf.flags & DDPF_ALPHA) printf("\t\tDDPF_ALPHA\n"); + if (header.pf.flags & DDPF_PALETTEINDEXED1) printf("\t\tDDPF_PALETTEINDEXED1\n"); + if (header.pf.flags & DDPF_PALETTEINDEXED2) printf("\t\tDDPF_PALETTEINDEXED2\n"); + if (header.pf.flags & DDPF_PALETTEINDEXED4) printf("\t\tDDPF_PALETTEINDEXED4\n"); + if (header.pf.flags & DDPF_PALETTEINDEXED8) printf("\t\tDDPF_PALETTEINDEXED8\n"); + if (header.pf.flags & DDPF_ALPHAPREMULT) printf("\t\tDDPF_ALPHAPREMULT\n"); + if (header.pf.flags & DDPF_NORMAL) printf("\t\tDDPF_NORMAL\n"); + + if (header.pf.fourcc != 0) { + // Display fourcc code even when DDPF_FOURCC flag not set. + printf("\tFourCC: '%c%c%c%c' (0x%.8X)\n", + ((header.pf.fourcc >> 0) & 0xFF), + ((header.pf.fourcc >> 8) & 0xFF), + ((header.pf.fourcc >> 16) & 0xFF), + ((header.pf.fourcc >> 24) & 0xFF), + header.pf.fourcc); + } + + if ((header.pf.flags & DDPF_FOURCC) && (header.pf.bitcount != 0)) + { + printf("\tSwizzle: '%c%c%c%c' (0x%.8X)\n", + (header.pf.bitcount >> 0) & 0xFF, + (header.pf.bitcount >> 8) & 0xFF, + (header.pf.bitcount >> 16) & 0xFF, + (header.pf.bitcount >> 24) & 0xFF, + header.pf.bitcount); + } + else + { + printf("\tBit count: %d\n", header.pf.bitcount); + } + + printf("\tRed mask: 0x%.8X\n", header.pf.rmask); + printf("\tGreen mask: 0x%.8X\n", header.pf.gmask); + printf("\tBlue mask: 0x%.8X\n", header.pf.bmask); + printf("\tAlpha mask: 0x%.8X\n", header.pf.amask); + + printf("Caps:\n"); + printf("\tCaps 1: 0x%.8X\n", header.caps.caps1); + if (header.caps.caps1 & DDSCAPS_COMPLEX) printf("\t\tDDSCAPS_COMPLEX\n"); + if (header.caps.caps1 & DDSCAPS_TEXTURE) printf("\t\tDDSCAPS_TEXTURE\n"); + if (header.caps.caps1 & DDSCAPS_MIPMAP) printf("\t\tDDSCAPS_MIPMAP\n"); + + printf("\tCaps 2: 0x%.8X\n", header.caps.caps2); + if (header.caps.caps2 & DDSCAPS2_VOLUME) printf("\t\tDDSCAPS2_VOLUME\n"); + else if (header.caps.caps2 & DDSCAPS2_CUBEMAP) + { + printf("\t\tDDSCAPS2_CUBEMAP\n"); + if ((header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) == DDSCAPS2_CUBEMAP_ALL_FACES) printf("\t\tDDSCAPS2_CUBEMAP_ALL_FACES\n"); + else { + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEX) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEX\n"); + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEX) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEX\n"); + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEY) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEY\n"); + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEY) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEY\n"); + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEZ) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEZ\n"); + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEZ) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEZ\n"); + } + } + + printf("\tCaps 3: 0x%.8X\n", header.caps.caps3); + printf("\tCaps 4: 0x%.8X\n", header.caps.caps4); + + if (header.hasDX10Header()) + { + printf("DX10 Header:\n"); + printf("\tDXGI Format: %u (%s)\n", header.header10.dxgiFormat, getDxgiFormatString((DXGI_FORMAT)header.header10.dxgiFormat)); + printf("\tResource dimension: %u (%s)\n", header.header10.resourceDimension, getD3d10ResourceDimensionString((D3D10_RESOURCE_DIMENSION)header.header10.resourceDimension)); + printf("\tMisc flag: %u\n", header.header10.miscFlag); + printf("\tArray size: %u\n", header.header10.arraySize); + } + + if (header.reserved[9] == FOURCC_NVTT) + { + int major = (header.reserved[10] >> 16) & 0xFF; + int minor = (header.reserved[10] >> 8) & 0xFF; + int revision= header.reserved[10] & 0xFF; + + printf("Version:\n"); + printf("\tNVIDIA Texture Tools %d.%d.%d\n", major, minor, revision); + } + + if (header.reserved[7] == FOURCC_UVER) + { + printf("User Version: %d\n", header.reserved[8]); + } +} + diff --git a/src/nvimage/DirectDrawSurface.h b/src/nvimage/DirectDrawSurface.h index ed02b8a..5a8c62b 100644 --- a/src/nvimage/DirectDrawSurface.h +++ b/src/nvimage/DirectDrawSurface.h @@ -1,406 +1,406 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#pragma once -#ifndef NV_IMAGE_DIRECTDRAWSURFACE_H -#define NV_IMAGE_DIRECTDRAWSURFACE_H - -#include "nvimage.h" - -#if !defined(MAKEFOURCC) -#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ - (uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \ - (uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 )) -#endif - -namespace nv -{ - class Image; - class Stream; - struct ColorBlock; - - extern const uint FOURCC_NVTT; - extern const uint FOURCC_DDS; - extern const uint FOURCC_DXT1; - extern const uint FOURCC_DXT2; - extern const uint FOURCC_DXT3; - extern const uint FOURCC_DXT4; - extern const uint FOURCC_DXT5; - extern const uint FOURCC_RXGB; - extern const uint FOURCC_ATI1; - extern const uint FOURCC_ATI2; - - enum DDPF - { - DDPF_ALPHAPIXELS = 0x00000001U, - DDPF_ALPHA = 0x00000002U, - DDPF_FOURCC = 0x00000004U, - DDPF_RGB = 0x00000040U, - DDPF_PALETTEINDEXED1 = 0x00000800U, - DDPF_PALETTEINDEXED2 = 0x00001000U, - DDPF_PALETTEINDEXED4 = 0x00000008U, - DDPF_PALETTEINDEXED8 = 0x00000020U, - DDPF_LUMINANCE = 0x00020000U, - DDPF_ALPHAPREMULT = 0x00008000U, - - // Custom NVTT flags. - DDPF_NORMAL = 0x80000000U, - DDPF_SRGB = 0x40000000U, - }; - - - enum D3DFORMAT - { - // 32 bit RGB formats. - D3DFMT_R8G8B8 = 20, - D3DFMT_A8R8G8B8 = 21, - D3DFMT_X8R8G8B8 = 22, - D3DFMT_R5G6B5 = 23, - D3DFMT_X1R5G5B5 = 24, - D3DFMT_A1R5G5B5 = 25, - D3DFMT_A4R4G4B4 = 26, - D3DFMT_R3G3B2 = 27, - D3DFMT_A8 = 28, - D3DFMT_A8R3G3B2 = 29, - D3DFMT_X4R4G4B4 = 30, - D3DFMT_A2B10G10R10 = 31, - D3DFMT_A8B8G8R8 = 32, - D3DFMT_X8B8G8R8 = 33, - D3DFMT_G16R16 = 34, - D3DFMT_A2R10G10B10 = 35, - - D3DFMT_A16B16G16R16 = 36, - - // Palette formats. - D3DFMT_A8P8 = 40, - D3DFMT_P8 = 41, - - // Luminance formats. - D3DFMT_L8 = 50, - D3DFMT_A8L8 = 51, - D3DFMT_A4L4 = 52, - D3DFMT_L16 = 81, - - // Floating point formats - D3DFMT_R16F = 111, - D3DFMT_G16R16F = 112, - D3DFMT_A16B16G16R16F = 113, - D3DFMT_R32F = 114, - D3DFMT_G32R32F = 115, - D3DFMT_A32B32G32R32F = 116, - }; - - - // D3D1x resource dimensions. - enum D3D10_RESOURCE_DIMENSION - { - D3D10_RESOURCE_DIMENSION_UNKNOWN = 0, - D3D10_RESOURCE_DIMENSION_BUFFER = 1, - D3D10_RESOURCE_DIMENSION_TEXTURE1D = 2, - D3D10_RESOURCE_DIMENSION_TEXTURE2D = 3, - D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4, - }; - - // DXGI formats. - enum DXGI_FORMAT - { - DXGI_FORMAT_UNKNOWN = 0, - - DXGI_FORMAT_R32G32B32A32_TYPELESS = 1, - DXGI_FORMAT_R32G32B32A32_FLOAT = 2, - DXGI_FORMAT_R32G32B32A32_UINT = 3, - DXGI_FORMAT_R32G32B32A32_SINT = 4, - - DXGI_FORMAT_R32G32B32_TYPELESS = 5, - DXGI_FORMAT_R32G32B32_FLOAT = 6, - DXGI_FORMAT_R32G32B32_UINT = 7, - DXGI_FORMAT_R32G32B32_SINT = 8, - - DXGI_FORMAT_R16G16B16A16_TYPELESS = 9, - DXGI_FORMAT_R16G16B16A16_FLOAT = 10, - DXGI_FORMAT_R16G16B16A16_UNORM = 11, - DXGI_FORMAT_R16G16B16A16_UINT = 12, - DXGI_FORMAT_R16G16B16A16_SNORM = 13, - DXGI_FORMAT_R16G16B16A16_SINT = 14, - - DXGI_FORMAT_R32G32_TYPELESS = 15, - DXGI_FORMAT_R32G32_FLOAT = 16, - DXGI_FORMAT_R32G32_UINT = 17, - DXGI_FORMAT_R32G32_SINT = 18, - - DXGI_FORMAT_R32G8X24_TYPELESS = 19, - DXGI_FORMAT_D32_FLOAT_S8X24_UINT = 20, - DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS = 21, - DXGI_FORMAT_X32_TYPELESS_G8X24_UINT = 22, - - DXGI_FORMAT_R10G10B10A2_TYPELESS = 23, - DXGI_FORMAT_R10G10B10A2_UNORM = 24, - DXGI_FORMAT_R10G10B10A2_UINT = 25, - - DXGI_FORMAT_R11G11B10_FLOAT = 26, - - DXGI_FORMAT_R8G8B8A8_TYPELESS = 27, - DXGI_FORMAT_R8G8B8A8_UNORM = 28, - DXGI_FORMAT_R8G8B8A8_UNORM_SRGB = 29, - DXGI_FORMAT_R8G8B8A8_UINT = 30, - DXGI_FORMAT_R8G8B8A8_SNORM = 31, - DXGI_FORMAT_R8G8B8A8_SINT = 32, - - DXGI_FORMAT_R16G16_TYPELESS = 33, - DXGI_FORMAT_R16G16_FLOAT = 34, - DXGI_FORMAT_R16G16_UNORM = 35, - DXGI_FORMAT_R16G16_UINT = 36, - DXGI_FORMAT_R16G16_SNORM = 37, - DXGI_FORMAT_R16G16_SINT = 38, - - DXGI_FORMAT_R32_TYPELESS = 39, - DXGI_FORMAT_D32_FLOAT = 40, - DXGI_FORMAT_R32_FLOAT = 41, - DXGI_FORMAT_R32_UINT = 42, - DXGI_FORMAT_R32_SINT = 43, - - DXGI_FORMAT_R24G8_TYPELESS = 44, - DXGI_FORMAT_D24_UNORM_S8_UINT = 45, - DXGI_FORMAT_R24_UNORM_X8_TYPELESS = 46, - DXGI_FORMAT_X24_TYPELESS_G8_UINT = 47, - - DXGI_FORMAT_R8G8_TYPELESS = 48, - DXGI_FORMAT_R8G8_UNORM = 49, - DXGI_FORMAT_R8G8_UINT = 50, - DXGI_FORMAT_R8G8_SNORM = 51, - DXGI_FORMAT_R8G8_SINT = 52, - - DXGI_FORMAT_R16_TYPELESS = 53, - DXGI_FORMAT_R16_FLOAT = 54, - DXGI_FORMAT_D16_UNORM = 55, - DXGI_FORMAT_R16_UNORM = 56, - DXGI_FORMAT_R16_UINT = 57, - DXGI_FORMAT_R16_SNORM = 58, - DXGI_FORMAT_R16_SINT = 59, - - DXGI_FORMAT_R8_TYPELESS = 60, - DXGI_FORMAT_R8_UNORM = 61, - DXGI_FORMAT_R8_UINT = 62, - DXGI_FORMAT_R8_SNORM = 63, - DXGI_FORMAT_R8_SINT = 64, - DXGI_FORMAT_A8_UNORM = 65, - - DXGI_FORMAT_R1_UNORM = 66, - - DXGI_FORMAT_R9G9B9E5_SHAREDEXP = 67, - - DXGI_FORMAT_R8G8_B8G8_UNORM = 68, - DXGI_FORMAT_G8R8_G8B8_UNORM = 69, - - DXGI_FORMAT_BC1_TYPELESS = 70, - DXGI_FORMAT_BC1_UNORM = 71, - DXGI_FORMAT_BC1_UNORM_SRGB = 72, - - DXGI_FORMAT_BC2_TYPELESS = 73, - DXGI_FORMAT_BC2_UNORM = 74, - DXGI_FORMAT_BC2_UNORM_SRGB = 75, - - DXGI_FORMAT_BC3_TYPELESS = 76, - DXGI_FORMAT_BC3_UNORM = 77, - DXGI_FORMAT_BC3_UNORM_SRGB = 78, - - DXGI_FORMAT_BC4_TYPELESS = 79, - DXGI_FORMAT_BC4_UNORM = 80, - DXGI_FORMAT_BC4_SNORM = 81, - - DXGI_FORMAT_BC5_TYPELESS = 82, - DXGI_FORMAT_BC5_UNORM = 83, - DXGI_FORMAT_BC5_SNORM = 84, - - DXGI_FORMAT_B5G6R5_UNORM = 85, - DXGI_FORMAT_B5G5R5A1_UNORM = 86, - DXGI_FORMAT_B8G8R8A8_UNORM = 87, - DXGI_FORMAT_B8G8R8X8_UNORM = 88, - - DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM = 89, - DXGI_FORMAT_B8G8R8A8_TYPELESS = 90, - DXGI_FORMAT_B8G8R8A8_UNORM_SRGB = 91, - DXGI_FORMAT_B8G8R8X8_TYPELESS = 92, - DXGI_FORMAT_B8G8R8X8_UNORM_SRGB = 93, - - DXGI_FORMAT_BC6H_TYPELESS = 94, - DXGI_FORMAT_BC6H_UF16 = 95, - DXGI_FORMAT_BC6H_SF16 = 96, - - DXGI_FORMAT_BC7_TYPELESS = 97, - DXGI_FORMAT_BC7_UNORM = 98, - DXGI_FORMAT_BC7_UNORM_SRGB = 99, - }; - - - - extern uint findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); - - struct NVIMAGE_CLASS DDSPixelFormat - { - uint size; - uint flags; - uint fourcc; - uint bitcount; - uint rmask; - uint gmask; - uint bmask; - uint amask; - }; - - struct NVIMAGE_CLASS DDSCaps - { - uint caps1; - uint caps2; - uint caps3; - uint caps4; - }; - - /// DDS file header for DX10. - struct NVIMAGE_CLASS DDSHeader10 - { - uint dxgiFormat; - uint resourceDimension; - uint miscFlag; - uint arraySize; - uint reserved; - }; - - /// DDS file header. - struct NVIMAGE_CLASS DDSHeader - { - uint fourcc; - uint size; - uint flags; - uint height; - uint width; - uint pitch; - uint depth; - uint mipmapcount; - uint reserved[11]; - DDSPixelFormat pf; - DDSCaps caps; - uint notused; - DDSHeader10 header10; - - - // Helper methods. - DDSHeader(); - - void setWidth(uint w); - void setHeight(uint h); - void setDepth(uint d); - void setMipmapCount(uint count); - void setTexture2D(); - void setTexture3D(); - void setTextureCube(); - void setLinearSize(uint size); - void setPitch(uint pitch); - void setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3); - void setFormatCode(uint code); - void setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3); - void setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); - void setDX10Format(uint format); - void setNormalFlag(bool b); - void setSrgbFlag(bool b); - void setHasAlphaFlag(bool b); - void setUserVersion(int version); - - void swapBytes(); - - bool hasDX10Header() const; - uint signature() const; - uint toolVersion() const; - uint userVersion() const; - bool isNormalMap() const; - bool isSrgb() const; - bool hasAlpha() const; - uint d3d9Format() const; - uint pixelSize() const; // In bits! - uint blockSize() const; // In bytes! - bool isBlockFormat() const; - }; - - NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header); - - - /// DirectDraw Surface. (DDS) - class NVIMAGE_CLASS DirectDrawSurface - { - public: - DirectDrawSurface(); - DirectDrawSurface(const char * file); - DirectDrawSurface(Stream * stream); - ~DirectDrawSurface(); - - bool load(const char * filename); - bool load(Stream * stream); - - bool isValid() const; - bool isSupported() const; - - bool hasAlpha() const; - - uint mipmapCount() const; - uint width() const; - uint height() const; - uint depth() const; - bool isTexture1D() const; - bool isTexture2D() const; - bool isTexture3D() const; - bool isTextureCube() const; - - void setNormalFlag(bool b); - void setHasAlphaFlag(bool b); - void setUserVersion(int version); - - void mipmap(Image * img, uint f, uint m); - - uint surfaceWidth(uint mipmap) const; - uint surfaceHeight(uint mipmap) const; - uint surfaceDepth(uint mipmap) const; - uint surfaceSize(uint mipmap) const; - bool readSurface(uint face, uint mipmap, void * data, uint size); - - void printInfo() const; - - // Only initialized after loading. - DDSHeader header; - - private: - - uint faceSize() const; - uint offset(uint face, uint mipmap); - - void readLinearImage(Image * img); - void readBlockImage(Image * img); - void readBlock(ColorBlock * rgba); - - - private: - Stream * stream; - }; - -} // nv namespace - -#endif // NV_IMAGE_DIRECTDRAWSURFACE_H +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#pragma once +#ifndef NV_IMAGE_DIRECTDRAWSURFACE_H +#define NV_IMAGE_DIRECTDRAWSURFACE_H + +#include "nvimage.h" + +#if !defined(MAKEFOURCC) +#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ + (uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \ + (uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 )) +#endif + +namespace nv +{ + class Image; + class Stream; + struct ColorBlock; + + extern const uint FOURCC_NVTT; + extern const uint FOURCC_DDS; + extern const uint FOURCC_DXT1; + extern const uint FOURCC_DXT2; + extern const uint FOURCC_DXT3; + extern const uint FOURCC_DXT4; + extern const uint FOURCC_DXT5; + extern const uint FOURCC_RXGB; + extern const uint FOURCC_ATI1; + extern const uint FOURCC_ATI2; + + enum DDPF + { + DDPF_ALPHAPIXELS = 0x00000001U, + DDPF_ALPHA = 0x00000002U, + DDPF_FOURCC = 0x00000004U, + DDPF_RGB = 0x00000040U, + DDPF_PALETTEINDEXED1 = 0x00000800U, + DDPF_PALETTEINDEXED2 = 0x00001000U, + DDPF_PALETTEINDEXED4 = 0x00000008U, + DDPF_PALETTEINDEXED8 = 0x00000020U, + DDPF_LUMINANCE = 0x00020000U, + DDPF_ALPHAPREMULT = 0x00008000U, + + // Custom NVTT flags. + DDPF_NORMAL = 0x80000000U, + DDPF_SRGB = 0x40000000U, + }; + + + enum D3DFORMAT + { + // 32 bit RGB formats. + D3DFMT_R8G8B8 = 20, + D3DFMT_A8R8G8B8 = 21, + D3DFMT_X8R8G8B8 = 22, + D3DFMT_R5G6B5 = 23, + D3DFMT_X1R5G5B5 = 24, + D3DFMT_A1R5G5B5 = 25, + D3DFMT_A4R4G4B4 = 26, + D3DFMT_R3G3B2 = 27, + D3DFMT_A8 = 28, + D3DFMT_A8R3G3B2 = 29, + D3DFMT_X4R4G4B4 = 30, + D3DFMT_A2B10G10R10 = 31, + D3DFMT_A8B8G8R8 = 32, + D3DFMT_X8B8G8R8 = 33, + D3DFMT_G16R16 = 34, + D3DFMT_A2R10G10B10 = 35, + + D3DFMT_A16B16G16R16 = 36, + + // Palette formats. + D3DFMT_A8P8 = 40, + D3DFMT_P8 = 41, + + // Luminance formats. + D3DFMT_L8 = 50, + D3DFMT_A8L8 = 51, + D3DFMT_A4L4 = 52, + D3DFMT_L16 = 81, + + // Floating point formats + D3DFMT_R16F = 111, + D3DFMT_G16R16F = 112, + D3DFMT_A16B16G16R16F = 113, + D3DFMT_R32F = 114, + D3DFMT_G32R32F = 115, + D3DFMT_A32B32G32R32F = 116, + }; + + + // D3D1x resource dimensions. + enum D3D10_RESOURCE_DIMENSION + { + D3D10_RESOURCE_DIMENSION_UNKNOWN = 0, + D3D10_RESOURCE_DIMENSION_BUFFER = 1, + D3D10_RESOURCE_DIMENSION_TEXTURE1D = 2, + D3D10_RESOURCE_DIMENSION_TEXTURE2D = 3, + D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4, + }; + + // DXGI formats. + enum DXGI_FORMAT + { + DXGI_FORMAT_UNKNOWN = 0, + + DXGI_FORMAT_R32G32B32A32_TYPELESS = 1, + DXGI_FORMAT_R32G32B32A32_FLOAT = 2, + DXGI_FORMAT_R32G32B32A32_UINT = 3, + DXGI_FORMAT_R32G32B32A32_SINT = 4, + + DXGI_FORMAT_R32G32B32_TYPELESS = 5, + DXGI_FORMAT_R32G32B32_FLOAT = 6, + DXGI_FORMAT_R32G32B32_UINT = 7, + DXGI_FORMAT_R32G32B32_SINT = 8, + + DXGI_FORMAT_R16G16B16A16_TYPELESS = 9, + DXGI_FORMAT_R16G16B16A16_FLOAT = 10, + DXGI_FORMAT_R16G16B16A16_UNORM = 11, + DXGI_FORMAT_R16G16B16A16_UINT = 12, + DXGI_FORMAT_R16G16B16A16_SNORM = 13, + DXGI_FORMAT_R16G16B16A16_SINT = 14, + + DXGI_FORMAT_R32G32_TYPELESS = 15, + DXGI_FORMAT_R32G32_FLOAT = 16, + DXGI_FORMAT_R32G32_UINT = 17, + DXGI_FORMAT_R32G32_SINT = 18, + + DXGI_FORMAT_R32G8X24_TYPELESS = 19, + DXGI_FORMAT_D32_FLOAT_S8X24_UINT = 20, + DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS = 21, + DXGI_FORMAT_X32_TYPELESS_G8X24_UINT = 22, + + DXGI_FORMAT_R10G10B10A2_TYPELESS = 23, + DXGI_FORMAT_R10G10B10A2_UNORM = 24, + DXGI_FORMAT_R10G10B10A2_UINT = 25, + + DXGI_FORMAT_R11G11B10_FLOAT = 26, + + DXGI_FORMAT_R8G8B8A8_TYPELESS = 27, + DXGI_FORMAT_R8G8B8A8_UNORM = 28, + DXGI_FORMAT_R8G8B8A8_UNORM_SRGB = 29, + DXGI_FORMAT_R8G8B8A8_UINT = 30, + DXGI_FORMAT_R8G8B8A8_SNORM = 31, + DXGI_FORMAT_R8G8B8A8_SINT = 32, + + DXGI_FORMAT_R16G16_TYPELESS = 33, + DXGI_FORMAT_R16G16_FLOAT = 34, + DXGI_FORMAT_R16G16_UNORM = 35, + DXGI_FORMAT_R16G16_UINT = 36, + DXGI_FORMAT_R16G16_SNORM = 37, + DXGI_FORMAT_R16G16_SINT = 38, + + DXGI_FORMAT_R32_TYPELESS = 39, + DXGI_FORMAT_D32_FLOAT = 40, + DXGI_FORMAT_R32_FLOAT = 41, + DXGI_FORMAT_R32_UINT = 42, + DXGI_FORMAT_R32_SINT = 43, + + DXGI_FORMAT_R24G8_TYPELESS = 44, + DXGI_FORMAT_D24_UNORM_S8_UINT = 45, + DXGI_FORMAT_R24_UNORM_X8_TYPELESS = 46, + DXGI_FORMAT_X24_TYPELESS_G8_UINT = 47, + + DXGI_FORMAT_R8G8_TYPELESS = 48, + DXGI_FORMAT_R8G8_UNORM = 49, + DXGI_FORMAT_R8G8_UINT = 50, + DXGI_FORMAT_R8G8_SNORM = 51, + DXGI_FORMAT_R8G8_SINT = 52, + + DXGI_FORMAT_R16_TYPELESS = 53, + DXGI_FORMAT_R16_FLOAT = 54, + DXGI_FORMAT_D16_UNORM = 55, + DXGI_FORMAT_R16_UNORM = 56, + DXGI_FORMAT_R16_UINT = 57, + DXGI_FORMAT_R16_SNORM = 58, + DXGI_FORMAT_R16_SINT = 59, + + DXGI_FORMAT_R8_TYPELESS = 60, + DXGI_FORMAT_R8_UNORM = 61, + DXGI_FORMAT_R8_UINT = 62, + DXGI_FORMAT_R8_SNORM = 63, + DXGI_FORMAT_R8_SINT = 64, + DXGI_FORMAT_A8_UNORM = 65, + + DXGI_FORMAT_R1_UNORM = 66, + + DXGI_FORMAT_R9G9B9E5_SHAREDEXP = 67, + + DXGI_FORMAT_R8G8_B8G8_UNORM = 68, + DXGI_FORMAT_G8R8_G8B8_UNORM = 69, + + DXGI_FORMAT_BC1_TYPELESS = 70, + DXGI_FORMAT_BC1_UNORM = 71, + DXGI_FORMAT_BC1_UNORM_SRGB = 72, + + DXGI_FORMAT_BC2_TYPELESS = 73, + DXGI_FORMAT_BC2_UNORM = 74, + DXGI_FORMAT_BC2_UNORM_SRGB = 75, + + DXGI_FORMAT_BC3_TYPELESS = 76, + DXGI_FORMAT_BC3_UNORM = 77, + DXGI_FORMAT_BC3_UNORM_SRGB = 78, + + DXGI_FORMAT_BC4_TYPELESS = 79, + DXGI_FORMAT_BC4_UNORM = 80, + DXGI_FORMAT_BC4_SNORM = 81, + + DXGI_FORMAT_BC5_TYPELESS = 82, + DXGI_FORMAT_BC5_UNORM = 83, + DXGI_FORMAT_BC5_SNORM = 84, + + DXGI_FORMAT_B5G6R5_UNORM = 85, + DXGI_FORMAT_B5G5R5A1_UNORM = 86, + DXGI_FORMAT_B8G8R8A8_UNORM = 87, + DXGI_FORMAT_B8G8R8X8_UNORM = 88, + + DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM = 89, + DXGI_FORMAT_B8G8R8A8_TYPELESS = 90, + DXGI_FORMAT_B8G8R8A8_UNORM_SRGB = 91, + DXGI_FORMAT_B8G8R8X8_TYPELESS = 92, + DXGI_FORMAT_B8G8R8X8_UNORM_SRGB = 93, + + DXGI_FORMAT_BC6H_TYPELESS = 94, + DXGI_FORMAT_BC6H_UF16 = 95, + DXGI_FORMAT_BC6H_SF16 = 96, + + DXGI_FORMAT_BC7_TYPELESS = 97, + DXGI_FORMAT_BC7_UNORM = 98, + DXGI_FORMAT_BC7_UNORM_SRGB = 99, + }; + + + + extern uint findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); + + struct NVIMAGE_CLASS DDSPixelFormat + { + uint size; + uint flags; + uint fourcc; + uint bitcount; + uint rmask; + uint gmask; + uint bmask; + uint amask; + }; + + struct NVIMAGE_CLASS DDSCaps + { + uint caps1; + uint caps2; + uint caps3; + uint caps4; + }; + + /// DDS file header for DX10. + struct NVIMAGE_CLASS DDSHeader10 + { + uint dxgiFormat; + uint resourceDimension; + uint miscFlag; + uint arraySize; + uint reserved; + }; + + /// DDS file header. + struct NVIMAGE_CLASS DDSHeader + { + uint fourcc; + uint size; + uint flags; + uint height; + uint width; + uint pitch; + uint depth; + uint mipmapcount; + uint reserved[11]; + DDSPixelFormat pf; + DDSCaps caps; + uint notused; + DDSHeader10 header10; + + + // Helper methods. + DDSHeader(); + + void setWidth(uint w); + void setHeight(uint h); + void setDepth(uint d); + void setMipmapCount(uint count); + void setTexture2D(); + void setTexture3D(); + void setTextureCube(); + void setLinearSize(uint size); + void setPitch(uint pitch); + void setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3); + void setFormatCode(uint code); + void setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3); + void setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); + void setDX10Format(uint format); + void setNormalFlag(bool b); + void setSrgbFlag(bool b); + void setHasAlphaFlag(bool b); + void setUserVersion(int version); + + void swapBytes(); + + bool hasDX10Header() const; + uint signature() const; + uint toolVersion() const; + uint userVersion() const; + bool isNormalMap() const; + bool isSrgb() const; + bool hasAlpha() const; + uint d3d9Format() const; + uint pixelSize() const; // In bits! + uint blockSize() const; // In bytes! + bool isBlockFormat() const; + }; + + NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header); + + + /// DirectDraw Surface. (DDS) + class NVIMAGE_CLASS DirectDrawSurface + { + public: + DirectDrawSurface(); + DirectDrawSurface(const char * file); + DirectDrawSurface(Stream * stream); + ~DirectDrawSurface(); + + bool load(const char * filename); + bool load(Stream * stream); + + bool isValid() const; + bool isSupported() const; + + bool hasAlpha() const; + + uint mipmapCount() const; + uint width() const; + uint height() const; + uint depth() const; + bool isTexture1D() const; + bool isTexture2D() const; + bool isTexture3D() const; + bool isTextureCube() const; + + void setNormalFlag(bool b); + void setHasAlphaFlag(bool b); + void setUserVersion(int version); + + void mipmap(Image * img, uint f, uint m); + + uint surfaceWidth(uint mipmap) const; + uint surfaceHeight(uint mipmap) const; + uint surfaceDepth(uint mipmap) const; + uint surfaceSize(uint mipmap) const; + bool readSurface(uint face, uint mipmap, void * data, uint size); + + void printInfo() const; + + // Only initialized after loading. + DDSHeader header; + + private: + + uint faceSize() const; + uint offset(uint face, uint mipmap); + + void readLinearImage(Image * img); + void readBlockImage(Image * img); + void readBlock(ColorBlock * rgba); + + + private: + Stream * stream; + }; + +} // nv namespace + +#endif // NV_IMAGE_DIRECTDRAWSURFACE_H diff --git a/src/nvimage/Filter.cpp b/src/nvimage/Filter.cpp index db06d80..a89e54b 100644 --- a/src/nvimage/Filter.cpp +++ b/src/nvimage/Filter.cpp @@ -1,627 +1,627 @@ -// This code is in the public domain -- castanyo@yahoo.es - -/** @file Filter.cpp - * @brief Image filters. - * - * Jonathan Blow articles: - * http://number-none.com/product/Mipmapping, Part 1/index.html - * http://number-none.com/product/Mipmapping, Part 2/index.html - * - * References from Thacher Ulrich: - * See _Graphics Gems III_ "General Filtered Image Rescaling", Dale A. Schumacher - * http://tog.acm.org/GraphicsGems/gemsiii/filter.c - * - * References from Paul Heckbert: - * A.V. Oppenheim, R.W. Schafer, Digital Signal Processing, Prentice-Hall, 1975 - * - * R.W. Hamming, Digital Filters, Prentice-Hall, Englewood Cliffs, NJ, 1983 - * - * W.K. Pratt, Digital Image Processing, John Wiley and Sons, 1978 - * - * H.S. Hou, H.C. Andrews, "Cubic Splines for Image Interpolation and - * Digital Filtering", IEEE Trans. Acoustics, Speech, and Signal Proc., - * vol. ASSP-26, no. 6, Dec. 1978, pp. 508-517 - * - * Paul Heckbert's zoom library. - * http://www.xmission.com/~legalize/zoom.html - * - * Reconstruction Filters in Computer Graphics - * http://www.mentallandscape.com/Papers_siggraph88.pdf - * - * More references: - * http://www.worldserver.com/turk/computergraphics/ResamplingFilters.pdf - * http://www.dspguide.com/ch16.htm - */ - -#include "Filter.h" - -#include "nvmath/Vector.h" // Vector4 -#include "nvcore/Utils.h" // swap - -#include // memset - -using namespace nv; - -namespace -{ - // Sinc function. - inline static float sincf(const float x) - { - if (fabs(x) < NV_EPSILON) { - //return 1.0; - return 1.0f + x*x*(-1.0f/6.0f + x*x*1.0f/120.0f); - } - else { - return sin(x) / x; - } - } - - // Bessel function of the first kind from Jon Blow's article. - // http://mathworld.wolfram.com/BesselFunctionoftheFirstKind.html - // http://en.wikipedia.org/wiki/Bessel_function - inline static float bessel0(float x) - { - const float EPSILON_RATIO = 1e-6f; - float xh, sum, pow, ds; - int k; - - xh = 0.5f * x; - sum = 1.0f; - pow = 1.0f; - k = 0; - ds = 1.0; - while (ds > sum * EPSILON_RATIO) { - ++k; - pow = pow * (xh / k); - ds = pow * pow; - sum = sum + ds; - } - - return sum; - } - - /*// Alternative bessel function from Paul Heckbert. - static float _bessel0(float x) - { - const float EPSILON_RATIO = 1E-6; - float sum = 1.0f; - float y = x * x / 4.0f; - float t = y; - for(int i = 2; t > EPSILON_RATIO; i++) { - sum += t; - t *= y / float(i * i); - } - return sum; - }*/ - -} // namespace - - -Filter::Filter(float width) : m_width(width) -{ -} - -/*virtual*/ Filter::~Filter() -{ -} - -float Filter::sampleDelta(float x, float scale) const -{ - return evaluate((x + 0.5f)* scale); -} - -float Filter::sampleBox(float x, float scale, int samples) const -{ - double sum = 0; - float isamples = 1.0f / float(samples); - - for(int s = 0; s < samples; s++) - { - float p = (x + (float(s) + 0.5f) * isamples) * scale; - float value = evaluate(p); - - //printf("%f: %.8f (%X)\n", p, value, *(uint32 *)&value); - - sum += value; - } - - return float(sum * isamples); -} - -float Filter::sampleTriangle(float x, float scale, int samples) const -{ - double sum = 0; - float isamples = 1.0f / float(samples); - - for(int s = 0; s < samples; s++) - { - float offset = (2 * float(s) + 1.0f) * isamples; - float p = (x + offset - 0.5f) * scale; - float value = evaluate(p); - - float weight = offset; - if (weight > 1.0f) weight = 2.0f - weight; - - sum += value * weight; - } - - return float(2 * sum * isamples); -} - - - - - -BoxFilter::BoxFilter() : Filter(0.5f) {} -BoxFilter::BoxFilter(float width) : Filter(width) {} - -float BoxFilter::evaluate(float x) const -{ - if (fabs(x) <= m_width) return 1.0f; - else return 0.0f; -} - - -TriangleFilter::TriangleFilter() : Filter(1.0f) {} -TriangleFilter::TriangleFilter(float width) : Filter(width) {} - -float TriangleFilter::evaluate(float x) const -{ - x = fabs(x); - if( x < m_width ) return m_width - x; - return 0.0f; -} - - -QuadraticFilter::QuadraticFilter() : Filter(1.5f) {} - -float QuadraticFilter::evaluate(float x) const -{ - x = fabs(x); - if( x < 0.5f ) return 0.75f - x * x; - if( x < 1.5f ) { - float t = x - 1.5f; - return 0.5f * t * t; - } - return 0.0f; -} - - -CubicFilter::CubicFilter() : Filter(1.0f) {} - -float CubicFilter::evaluate(float x) const -{ - // f(t) = 2|t|^3 - 3|t|^2 + 1, -1 <= t <= 1 - x = fabs(x); - if( x < 1.0f ) return((2.0f * x - 3.0f) * x * x + 1.0f); - return 0.0f; -} - - -BSplineFilter::BSplineFilter() : Filter(2.0f) {} - -float BSplineFilter::evaluate(float x) const -{ - x = fabs(x); - if( x < 1.0f ) return (4.0f + x * x * (-6.0f + x * 3.0f)) / 6.0f; - if( x < 2.0f ) { - float t = 2.0f - x; - return t * t * t / 6.0f; - } - return 0.0f; -} - - -MitchellFilter::MitchellFilter() : Filter(2.0f) { setParameters(1.0f/3.0f, 1.0f/3.0f); } - -float MitchellFilter::evaluate(float x) const -{ - x = fabs(x); - if( x < 1.0f ) return p0 + x * x * (p2 + x * p3); - if( x < 2.0f ) return q0 + x * (q1 + x * (q2 + x * q3)); - return 0.0f; -} - -void MitchellFilter::setParameters(float b, float c) -{ - p0 = (6.0f - 2.0f * b) / 6.0f; - p2 = (-18.0f + 12.0f * b + 6.0f * c) / 6.0f; - p3 = (12.0f - 9.0f * b - 6.0f * c) / 6.0f; - q0 = (8.0f * b + 24.0f * c) / 6.0f; - q1 = (-12.0f * b - 48.0f * c) / 6.0f; - q2 = (6.0f * b + 30.0f * c) / 6.0f; - q3 = (-b - 6.0f * c) / 6.0f; -} - - -LanczosFilter::LanczosFilter() : Filter(3.0f) {} - -float LanczosFilter::evaluate(float x) const -{ - x = fabs(x); - if( x < 3.0f ) return sincf(PI * x) * sincf(PI * x / 3.0f); - return 0.0f; -} - - -SincFilter::SincFilter(float w) : Filter(w) {} - -float SincFilter::evaluate(float x) const -{ - return sincf(PI * x); -} - - -KaiserFilter::KaiserFilter(float w) : Filter(w) { setParameters(4.0f, 1.0f); } - -float KaiserFilter::evaluate(float x) const -{ - const float sinc_value = sincf(PI * x * stretch); - const float t = x / m_width; - if ((1 - t * t) >= 0) return sinc_value * bessel0(alpha * sqrtf(1 - t * t)) / bessel0(alpha); - else return 0; -} - -void KaiserFilter::setParameters(float alpha, float stretch) -{ - this->alpha = alpha; - this->stretch = stretch; -} - -GaussianFilter::GaussianFilter(float w) : Filter(w) { setParameters(1); } - -float GaussianFilter::evaluate(float x) const -{ - // variance = sigma^2 - return (1.0f / sqrtf(2 * PI * variance)) * expf(-x*x / (2 * variance)); -} - -void GaussianFilter::setParameters(float variance) -{ - this->variance = variance; -} - - - -Kernel1::Kernel1(const Filter & f, int iscale, int samples/*= 32*/) -{ - nvDebugCheck(iscale > 1); - nvDebugCheck(samples > 0); - - const float scale = 1.0f / iscale; - - m_width = f.width() * iscale; - m_windowSize = (int)ceilf(2 * m_width); - m_data = new float[m_windowSize]; - - const float offset = float(m_windowSize) / 2; - - float total = 0.0f; - for (int i = 0; i < m_windowSize; i++) - { - const float sample = f.sampleBox(i - offset, scale, samples); - m_data[i] = sample; - total += sample; - } - - const float inv = 1.0f / total; - for (int i = 0; i < m_windowSize; i++) - { - m_data[i] *= inv; - } -} - -Kernel1::~Kernel1() -{ - delete m_data; -} - -// Print the kernel for debugging purposes. -void Kernel1::debugPrint() -{ - for (int i = 0; i < m_windowSize; i++) { - nvDebug("%d: %f\n", i, m_data[i]); - } -} - - - -Kernel2::Kernel2(uint ws) : m_windowSize(ws) -{ - m_data = new float[m_windowSize * m_windowSize]; -} - +// This code is in the public domain -- castanyo@yahoo.es + +/** @file Filter.cpp + * @brief Image filters. + * + * Jonathan Blow articles: + * http://number-none.com/product/Mipmapping, Part 1/index.html + * http://number-none.com/product/Mipmapping, Part 2/index.html + * + * References from Thacher Ulrich: + * See _Graphics Gems III_ "General Filtered Image Rescaling", Dale A. Schumacher + * http://tog.acm.org/GraphicsGems/gemsiii/filter.c + * + * References from Paul Heckbert: + * A.V. Oppenheim, R.W. Schafer, Digital Signal Processing, Prentice-Hall, 1975 + * + * R.W. Hamming, Digital Filters, Prentice-Hall, Englewood Cliffs, NJ, 1983 + * + * W.K. Pratt, Digital Image Processing, John Wiley and Sons, 1978 + * + * H.S. Hou, H.C. Andrews, "Cubic Splines for Image Interpolation and + * Digital Filtering", IEEE Trans. Acoustics, Speech, and Signal Proc., + * vol. ASSP-26, no. 6, Dec. 1978, pp. 508-517 + * + * Paul Heckbert's zoom library. + * http://www.xmission.com/~legalize/zoom.html + * + * Reconstruction Filters in Computer Graphics + * http://www.mentallandscape.com/Papers_siggraph88.pdf + * + * More references: + * http://www.worldserver.com/turk/computergraphics/ResamplingFilters.pdf + * http://www.dspguide.com/ch16.htm + */ + +#include "Filter.h" + +#include "nvmath/Vector.h" // Vector4 +#include "nvcore/Utils.h" // swap + +#include // memset + +using namespace nv; + +namespace +{ + // Sinc function. + inline static float sincf(const float x) + { + if (fabs(x) < NV_EPSILON) { + //return 1.0; + return 1.0f + x*x*(-1.0f/6.0f + x*x*1.0f/120.0f); + } + else { + return sin(x) / x; + } + } + + // Bessel function of the first kind from Jon Blow's article. + // http://mathworld.wolfram.com/BesselFunctionoftheFirstKind.html + // http://en.wikipedia.org/wiki/Bessel_function + inline static float bessel0(float x) + { + const float EPSILON_RATIO = 1e-6f; + float xh, sum, pow, ds; + int k; + + xh = 0.5f * x; + sum = 1.0f; + pow = 1.0f; + k = 0; + ds = 1.0; + while (ds > sum * EPSILON_RATIO) { + ++k; + pow = pow * (xh / k); + ds = pow * pow; + sum = sum + ds; + } + + return sum; + } + + /*// Alternative bessel function from Paul Heckbert. + static float _bessel0(float x) + { + const float EPSILON_RATIO = 1E-6; + float sum = 1.0f; + float y = x * x / 4.0f; + float t = y; + for(int i = 2; t > EPSILON_RATIO; i++) { + sum += t; + t *= y / float(i * i); + } + return sum; + }*/ + +} // namespace + + +Filter::Filter(float width) : m_width(width) +{ +} + +/*virtual*/ Filter::~Filter() +{ +} + +float Filter::sampleDelta(float x, float scale) const +{ + return evaluate((x + 0.5f)* scale); +} + +float Filter::sampleBox(float x, float scale, int samples) const +{ + double sum = 0; + float isamples = 1.0f / float(samples); + + for(int s = 0; s < samples; s++) + { + float p = (x + (float(s) + 0.5f) * isamples) * scale; + float value = evaluate(p); + + //printf("%f: %.8f (%X)\n", p, value, *(uint32 *)&value); + + sum += value; + } + + return float(sum * isamples); +} + +float Filter::sampleTriangle(float x, float scale, int samples) const +{ + double sum = 0; + float isamples = 1.0f / float(samples); + + for(int s = 0; s < samples; s++) + { + float offset = (2 * float(s) + 1.0f) * isamples; + float p = (x + offset - 0.5f) * scale; + float value = evaluate(p); + + float weight = offset; + if (weight > 1.0f) weight = 2.0f - weight; + + sum += value * weight; + } + + return float(2 * sum * isamples); +} + + + + + +BoxFilter::BoxFilter() : Filter(0.5f) {} +BoxFilter::BoxFilter(float width) : Filter(width) {} + +float BoxFilter::evaluate(float x) const +{ + if (fabs(x) <= m_width) return 1.0f; + else return 0.0f; +} + + +TriangleFilter::TriangleFilter() : Filter(1.0f) {} +TriangleFilter::TriangleFilter(float width) : Filter(width) {} + +float TriangleFilter::evaluate(float x) const +{ + x = fabs(x); + if( x < m_width ) return m_width - x; + return 0.0f; +} + + +QuadraticFilter::QuadraticFilter() : Filter(1.5f) {} + +float QuadraticFilter::evaluate(float x) const +{ + x = fabs(x); + if( x < 0.5f ) return 0.75f - x * x; + if( x < 1.5f ) { + float t = x - 1.5f; + return 0.5f * t * t; + } + return 0.0f; +} + + +CubicFilter::CubicFilter() : Filter(1.0f) {} + +float CubicFilter::evaluate(float x) const +{ + // f(t) = 2|t|^3 - 3|t|^2 + 1, -1 <= t <= 1 + x = fabs(x); + if( x < 1.0f ) return((2.0f * x - 3.0f) * x * x + 1.0f); + return 0.0f; +} + + +BSplineFilter::BSplineFilter() : Filter(2.0f) {} + +float BSplineFilter::evaluate(float x) const +{ + x = fabs(x); + if( x < 1.0f ) return (4.0f + x * x * (-6.0f + x * 3.0f)) / 6.0f; + if( x < 2.0f ) { + float t = 2.0f - x; + return t * t * t / 6.0f; + } + return 0.0f; +} + + +MitchellFilter::MitchellFilter() : Filter(2.0f) { setParameters(1.0f/3.0f, 1.0f/3.0f); } + +float MitchellFilter::evaluate(float x) const +{ + x = fabs(x); + if( x < 1.0f ) return p0 + x * x * (p2 + x * p3); + if( x < 2.0f ) return q0 + x * (q1 + x * (q2 + x * q3)); + return 0.0f; +} + +void MitchellFilter::setParameters(float b, float c) +{ + p0 = (6.0f - 2.0f * b) / 6.0f; + p2 = (-18.0f + 12.0f * b + 6.0f * c) / 6.0f; + p3 = (12.0f - 9.0f * b - 6.0f * c) / 6.0f; + q0 = (8.0f * b + 24.0f * c) / 6.0f; + q1 = (-12.0f * b - 48.0f * c) / 6.0f; + q2 = (6.0f * b + 30.0f * c) / 6.0f; + q3 = (-b - 6.0f * c) / 6.0f; +} + + +LanczosFilter::LanczosFilter() : Filter(3.0f) {} + +float LanczosFilter::evaluate(float x) const +{ + x = fabs(x); + if( x < 3.0f ) return sincf(PI * x) * sincf(PI * x / 3.0f); + return 0.0f; +} + + +SincFilter::SincFilter(float w) : Filter(w) {} + +float SincFilter::evaluate(float x) const +{ + return sincf(PI * x); +} + + +KaiserFilter::KaiserFilter(float w) : Filter(w) { setParameters(4.0f, 1.0f); } + +float KaiserFilter::evaluate(float x) const +{ + const float sinc_value = sincf(PI * x * stretch); + const float t = x / m_width; + if ((1 - t * t) >= 0) return sinc_value * bessel0(alpha * sqrtf(1 - t * t)) / bessel0(alpha); + else return 0; +} + +void KaiserFilter::setParameters(float alpha, float stretch) +{ + this->alpha = alpha; + this->stretch = stretch; +} + +GaussianFilter::GaussianFilter(float w) : Filter(w) { setParameters(1); } + +float GaussianFilter::evaluate(float x) const +{ + // variance = sigma^2 + return (1.0f / sqrtf(2 * PI * variance)) * expf(-x*x / (2 * variance)); +} + +void GaussianFilter::setParameters(float variance) +{ + this->variance = variance; +} + + + +Kernel1::Kernel1(const Filter & f, int iscale, int samples/*= 32*/) +{ + nvDebugCheck(iscale > 1); + nvDebugCheck(samples > 0); + + const float scale = 1.0f / iscale; + + m_width = f.width() * iscale; + m_windowSize = (int)ceilf(2 * m_width); + m_data = new float[m_windowSize]; + + const float offset = float(m_windowSize) / 2; + + float total = 0.0f; + for (int i = 0; i < m_windowSize; i++) + { + const float sample = f.sampleBox(i - offset, scale, samples); + m_data[i] = sample; + total += sample; + } + + const float inv = 1.0f / total; + for (int i = 0; i < m_windowSize; i++) + { + m_data[i] *= inv; + } +} + +Kernel1::~Kernel1() +{ + delete m_data; +} + +// Print the kernel for debugging purposes. +void Kernel1::debugPrint() +{ + for (int i = 0; i < m_windowSize; i++) { + nvDebug("%d: %f\n", i, m_data[i]); + } +} + + + +Kernel2::Kernel2(uint ws) : m_windowSize(ws) +{ + m_data = new float[m_windowSize * m_windowSize]; +} + Kernel2::Kernel2(uint ws, const float * data) : m_windowSize(ws) { m_data = new float[m_windowSize * m_windowSize]; memcpy(m_data, data, sizeof(float) * m_windowSize * m_windowSize); -} - -Kernel2::Kernel2(const Kernel2 & k) : m_windowSize(k.m_windowSize) -{ - m_data = new float[m_windowSize * m_windowSize]; - for (uint i = 0; i < m_windowSize * m_windowSize; i++) { - m_data[i] = k.m_data[i]; - } -} - - -Kernel2::~Kernel2() -{ - delete m_data; -} - -// Normalize the filter. -void Kernel2::normalize() -{ - float total = 0.0f; - for(uint i = 0; i < m_windowSize*m_windowSize; i++) { - total += fabs(m_data[i]); - } - - float inv = 1.0f / total; - for(uint i = 0; i < m_windowSize*m_windowSize; i++) { - m_data[i] *= inv; - } -} - -// Transpose the kernel. -void Kernel2::transpose() -{ - for(uint i = 0; i < m_windowSize; i++) { - for(uint j = i+1; j < m_windowSize; j++) { - swap(m_data[i*m_windowSize + j], m_data[j*m_windowSize + i]); - } - } -} - -// Init laplacian filter, usually used for sharpening. -void Kernel2::initLaplacian() -{ - nvDebugCheck(m_windowSize == 3); - // m_data[0] = -1; m_data[1] = -1; m_data[2] = -1; - // m_data[3] = -1; m_data[4] = +8; m_data[5] = -1; - // m_data[6] = -1; m_data[7] = -1; m_data[8] = -1; - - m_data[0] = +0; m_data[1] = -1; m_data[2] = +0; - m_data[3] = -1; m_data[4] = +4; m_data[5] = -1; - m_data[6] = +0; m_data[7] = -1; m_data[8] = +0; - - // m_data[0] = +1; m_data[1] = -2; m_data[2] = +1; - // m_data[3] = -2; m_data[4] = +4; m_data[5] = -2; - // m_data[6] = +1; m_data[7] = -2; m_data[8] = +1; -} - - -// Init simple edge detection filter. -void Kernel2::initEdgeDetection() -{ - nvCheck(m_windowSize == 3); - m_data[0] = 0; m_data[1] = 0; m_data[2] = 0; - m_data[3] =-1; m_data[4] = 0; m_data[5] = 1; - m_data[6] = 0; m_data[7] = 0; m_data[8] = 0; -} - -// Init sobel filter. -void Kernel2::initSobel() -{ - if (m_windowSize == 3) - { - m_data[0] = -1; m_data[1] = 0; m_data[2] = 1; - m_data[3] = -2; m_data[4] = 0; m_data[5] = 2; - m_data[6] = -1; m_data[7] = 0; m_data[8] = 1; - } - else if (m_windowSize == 5) - { - float elements[] = { - -1, -2, 0, 2, 1, - -2, -3, 0, 3, 2, - -3, -4, 0, 4, 3, - -2, -3, 0, 3, 2, - -1, -2, 0, 2, 1 - }; - - for (int i = 0; i < 5*5; i++) { - m_data[i] = elements[i]; - } - } - else if (m_windowSize == 7) - { - float elements[] = { - -1, -2, -3, 0, 3, 2, 1, - -2, -3, -4, 0, 4, 3, 2, - -3, -4, -5, 0, 5, 4, 3, - -4, -5, -6, 0, 6, 5, 4, - -3, -4, -5, 0, 5, 4, 3, - -2, -3, -4, 0, 4, 3, 2, - -1, -2, -3, 0, 3, 2, 1 - }; - - for (int i = 0; i < 7*7; i++) { - m_data[i] = elements[i]; - } - } - else if (m_windowSize == 9) - { - float elements[] = { - -1, -2, -3, -4, 0, 4, 3, 2, 1, - -2, -3, -4, -5, 0, 5, 4, 3, 2, - -3, -4, -5, -6, 0, 6, 5, 4, 3, - -4, -5, -6, -7, 0, 7, 6, 5, 4, - -5, -6, -7, -8, 0, 8, 7, 6, 5, - -4, -5, -6, -7, 0, 7, 6, 5, 4, - -3, -4, -5, -6, 0, 6, 5, 4, 3, - -2, -3, -4, -5, 0, 5, 4, 3, 2, - -1, -2, -3, -4, 0, 4, 3, 2, 1 - }; - - for (int i = 0; i < 9*9; i++) { - m_data[i] = elements[i]; - } - } -} - -// Init prewitt filter. -void Kernel2::initPrewitt() -{ - if (m_windowSize == 3) - { - m_data[0] = -1; m_data[1] = 0; m_data[2] = -1; - m_data[3] = -1; m_data[4] = 0; m_data[5] = -1; - m_data[6] = -1; m_data[7] = 0; m_data[8] = -1; - } - else if (m_windowSize == 5) - { - // @@ Is this correct? - float elements[] = { - -2, -1, 0, 1, 2, - -2, -1, 0, 1, 2, - -2, -1, 0, 1, 2, - -2, -1, 0, 1, 2, - -2, -1, 0, 1, 2 - }; - - for (int i = 0; i < 5*5; i++) { - m_data[i] = elements[i]; - } - } -} - -// Init blended sobel filter. -void Kernel2::initBlendedSobel(const Vector4 & scale) -{ - nvCheck(m_windowSize == 9); - - { - const float elements[] = { - -1, -2, -3, -4, 0, 4, 3, 2, 1, - -2, -3, -4, -5, 0, 5, 4, 3, 2, - -3, -4, -5, -6, 0, 6, 5, 4, 3, - -4, -5, -6, -7, 0, 7, 6, 5, 4, - -5, -6, -7, -8, 0, 8, 7, 6, 5, - -4, -5, -6, -7, 0, 7, 6, 5, 4, - -3, -4, -5, -6, 0, 6, 5, 4, 3, - -2, -3, -4, -5, 0, 5, 4, 3, 2, - -1, -2, -3, -4, 0, 4, 3, 2, 1 - }; - - for (int i = 0; i < 9*9; i++) { - m_data[i] = elements[i] * scale.w; - } - } - { - const float elements[] = { - -1, -2, -3, 0, 3, 2, 1, - -2, -3, -4, 0, 4, 3, 2, - -3, -4, -5, 0, 5, 4, 3, - -4, -5, -6, 0, 6, 5, 4, - -3, -4, -5, 0, 5, 4, 3, - -2, -3, -4, 0, 4, 3, 2, - -1, -2, -3, 0, 3, 2, 1, - }; - - for (int i = 0; i < 7; i++) { - for (int e = 0; e < 7; e++) { - m_data[(i + 1) * 9 + e + 1] += elements[i * 7 + e] * scale.z; - } - } - } - { - const float elements[] = { - -1, -2, 0, 2, 1, - -2, -3, 0, 3, 2, - -3, -4, 0, 4, 3, - -2, -3, 0, 3, 2, - -1, -2, 0, 2, 1 - }; - - for (int i = 0; i < 5; i++) { - for (int e = 0; e < 5; e++) { - m_data[(i + 2) * 9 + e + 2] += elements[i * 5 + e] * scale.y; - } - } - } - { - const float elements[] = { - -1, 0, 1, - -2, 0, 2, - -1, 0, 1, - }; - - for (int i = 0; i < 3; i++) { - for (int e = 0; e < 3; e++) { - m_data[(i + 3) * 9 + e + 3] += elements[i * 3 + e] * scale.x; - } - } - } -} - - -PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples/*= 32*/) -{ - nvDebugCheck(samples > 0); - - float scale = float(dstLength) / float(srcLength); - const float iscale = 1.0f / scale; - - if (scale > 1) { - // Upsampling. - samples = 1; - scale = 1; - } - - m_length = dstLength; - m_width = f.width() * iscale; - m_windowSize = (int)ceilf(m_width * 2) + 1; - - m_data = new float[m_windowSize * m_length]; - memset(m_data, 0, sizeof(float) * m_windowSize * m_length); - - for (uint i = 0; i < m_length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - m_width); - const int right = (int)ceilf(center + m_width); - nvDebugCheck(right - left <= m_windowSize); - - float total = 0.0f; - for (int j = 0; j < m_windowSize; j++) - { - const float sample = f.sampleBox(left + j - center, scale, samples); - - //printf("%f %X\n", sample, *(uint32 *)&sample); - - m_data[i * m_windowSize + j] = sample; - total += sample; - } - - // normalize weights. - for (int j = 0; j < m_windowSize; j++) - { - m_data[i * m_windowSize + j] /= total; - } - } -} - -PolyphaseKernel::~PolyphaseKernel() -{ - delete [] m_data; -} - - -// Print the kernel for debugging purposes. -void PolyphaseKernel::debugPrint() const -{ - for (uint i = 0; i < m_length; i++) - { - nvDebug("%d: ", i); - for (int j = 0; j < m_windowSize; j++) - { - nvDebug(" %6.4f", m_data[i * m_windowSize + j]); - } - nvDebug("\n"); - } -} - +} + +Kernel2::Kernel2(const Kernel2 & k) : m_windowSize(k.m_windowSize) +{ + m_data = new float[m_windowSize * m_windowSize]; + for (uint i = 0; i < m_windowSize * m_windowSize; i++) { + m_data[i] = k.m_data[i]; + } +} + + +Kernel2::~Kernel2() +{ + delete m_data; +} + +// Normalize the filter. +void Kernel2::normalize() +{ + float total = 0.0f; + for(uint i = 0; i < m_windowSize*m_windowSize; i++) { + total += fabs(m_data[i]); + } + + float inv = 1.0f / total; + for(uint i = 0; i < m_windowSize*m_windowSize; i++) { + m_data[i] *= inv; + } +} + +// Transpose the kernel. +void Kernel2::transpose() +{ + for(uint i = 0; i < m_windowSize; i++) { + for(uint j = i+1; j < m_windowSize; j++) { + swap(m_data[i*m_windowSize + j], m_data[j*m_windowSize + i]); + } + } +} + +// Init laplacian filter, usually used for sharpening. +void Kernel2::initLaplacian() +{ + nvDebugCheck(m_windowSize == 3); + // m_data[0] = -1; m_data[1] = -1; m_data[2] = -1; + // m_data[3] = -1; m_data[4] = +8; m_data[5] = -1; + // m_data[6] = -1; m_data[7] = -1; m_data[8] = -1; + + m_data[0] = +0; m_data[1] = -1; m_data[2] = +0; + m_data[3] = -1; m_data[4] = +4; m_data[5] = -1; + m_data[6] = +0; m_data[7] = -1; m_data[8] = +0; + + // m_data[0] = +1; m_data[1] = -2; m_data[2] = +1; + // m_data[3] = -2; m_data[4] = +4; m_data[5] = -2; + // m_data[6] = +1; m_data[7] = -2; m_data[8] = +1; +} + + +// Init simple edge detection filter. +void Kernel2::initEdgeDetection() +{ + nvCheck(m_windowSize == 3); + m_data[0] = 0; m_data[1] = 0; m_data[2] = 0; + m_data[3] =-1; m_data[4] = 0; m_data[5] = 1; + m_data[6] = 0; m_data[7] = 0; m_data[8] = 0; +} + +// Init sobel filter. +void Kernel2::initSobel() +{ + if (m_windowSize == 3) + { + m_data[0] = -1; m_data[1] = 0; m_data[2] = 1; + m_data[3] = -2; m_data[4] = 0; m_data[5] = 2; + m_data[6] = -1; m_data[7] = 0; m_data[8] = 1; + } + else if (m_windowSize == 5) + { + float elements[] = { + -1, -2, 0, 2, 1, + -2, -3, 0, 3, 2, + -3, -4, 0, 4, 3, + -2, -3, 0, 3, 2, + -1, -2, 0, 2, 1 + }; + + for (int i = 0; i < 5*5; i++) { + m_data[i] = elements[i]; + } + } + else if (m_windowSize == 7) + { + float elements[] = { + -1, -2, -3, 0, 3, 2, 1, + -2, -3, -4, 0, 4, 3, 2, + -3, -4, -5, 0, 5, 4, 3, + -4, -5, -6, 0, 6, 5, 4, + -3, -4, -5, 0, 5, 4, 3, + -2, -3, -4, 0, 4, 3, 2, + -1, -2, -3, 0, 3, 2, 1 + }; + + for (int i = 0; i < 7*7; i++) { + m_data[i] = elements[i]; + } + } + else if (m_windowSize == 9) + { + float elements[] = { + -1, -2, -3, -4, 0, 4, 3, 2, 1, + -2, -3, -4, -5, 0, 5, 4, 3, 2, + -3, -4, -5, -6, 0, 6, 5, 4, 3, + -4, -5, -6, -7, 0, 7, 6, 5, 4, + -5, -6, -7, -8, 0, 8, 7, 6, 5, + -4, -5, -6, -7, 0, 7, 6, 5, 4, + -3, -4, -5, -6, 0, 6, 5, 4, 3, + -2, -3, -4, -5, 0, 5, 4, 3, 2, + -1, -2, -3, -4, 0, 4, 3, 2, 1 + }; + + for (int i = 0; i < 9*9; i++) { + m_data[i] = elements[i]; + } + } +} + +// Init prewitt filter. +void Kernel2::initPrewitt() +{ + if (m_windowSize == 3) + { + m_data[0] = -1; m_data[1] = 0; m_data[2] = -1; + m_data[3] = -1; m_data[4] = 0; m_data[5] = -1; + m_data[6] = -1; m_data[7] = 0; m_data[8] = -1; + } + else if (m_windowSize == 5) + { + // @@ Is this correct? + float elements[] = { + -2, -1, 0, 1, 2, + -2, -1, 0, 1, 2, + -2, -1, 0, 1, 2, + -2, -1, 0, 1, 2, + -2, -1, 0, 1, 2 + }; + + for (int i = 0; i < 5*5; i++) { + m_data[i] = elements[i]; + } + } +} + +// Init blended sobel filter. +void Kernel2::initBlendedSobel(const Vector4 & scale) +{ + nvCheck(m_windowSize == 9); + + { + const float elements[] = { + -1, -2, -3, -4, 0, 4, 3, 2, 1, + -2, -3, -4, -5, 0, 5, 4, 3, 2, + -3, -4, -5, -6, 0, 6, 5, 4, 3, + -4, -5, -6, -7, 0, 7, 6, 5, 4, + -5, -6, -7, -8, 0, 8, 7, 6, 5, + -4, -5, -6, -7, 0, 7, 6, 5, 4, + -3, -4, -5, -6, 0, 6, 5, 4, 3, + -2, -3, -4, -5, 0, 5, 4, 3, 2, + -1, -2, -3, -4, 0, 4, 3, 2, 1 + }; + + for (int i = 0; i < 9*9; i++) { + m_data[i] = elements[i] * scale.w; + } + } + { + const float elements[] = { + -1, -2, -3, 0, 3, 2, 1, + -2, -3, -4, 0, 4, 3, 2, + -3, -4, -5, 0, 5, 4, 3, + -4, -5, -6, 0, 6, 5, 4, + -3, -4, -5, 0, 5, 4, 3, + -2, -3, -4, 0, 4, 3, 2, + -1, -2, -3, 0, 3, 2, 1, + }; + + for (int i = 0; i < 7; i++) { + for (int e = 0; e < 7; e++) { + m_data[(i + 1) * 9 + e + 1] += elements[i * 7 + e] * scale.z; + } + } + } + { + const float elements[] = { + -1, -2, 0, 2, 1, + -2, -3, 0, 3, 2, + -3, -4, 0, 4, 3, + -2, -3, 0, 3, 2, + -1, -2, 0, 2, 1 + }; + + for (int i = 0; i < 5; i++) { + for (int e = 0; e < 5; e++) { + m_data[(i + 2) * 9 + e + 2] += elements[i * 5 + e] * scale.y; + } + } + } + { + const float elements[] = { + -1, 0, 1, + -2, 0, 2, + -1, 0, 1, + }; + + for (int i = 0; i < 3; i++) { + for (int e = 0; e < 3; e++) { + m_data[(i + 3) * 9 + e + 3] += elements[i * 3 + e] * scale.x; + } + } + } +} + + +PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples/*= 32*/) +{ + nvDebugCheck(samples > 0); + + float scale = float(dstLength) / float(srcLength); + const float iscale = 1.0f / scale; + + if (scale > 1) { + // Upsampling. + samples = 1; + scale = 1; + } + + m_length = dstLength; + m_width = f.width() * iscale; + m_windowSize = (int)ceilf(m_width * 2) + 1; + + m_data = new float[m_windowSize * m_length]; + memset(m_data, 0, sizeof(float) * m_windowSize * m_length); + + for (uint i = 0; i < m_length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - m_width); + const int right = (int)ceilf(center + m_width); + nvDebugCheck(right - left <= m_windowSize); + + float total = 0.0f; + for (int j = 0; j < m_windowSize; j++) + { + const float sample = f.sampleBox(left + j - center, scale, samples); + + //printf("%f %X\n", sample, *(uint32 *)&sample); + + m_data[i * m_windowSize + j] = sample; + total += sample; + } + + // normalize weights. + for (int j = 0; j < m_windowSize; j++) + { + m_data[i * m_windowSize + j] /= total; + } + } +} + +PolyphaseKernel::~PolyphaseKernel() +{ + delete [] m_data; +} + + +// Print the kernel for debugging purposes. +void PolyphaseKernel::debugPrint() const +{ + for (uint i = 0; i < m_length; i++) + { + nvDebug("%d: ", i); + for (int j = 0; j < m_windowSize; j++) + { + nvDebug(" %6.4f", m_data[i * m_windowSize + j]); + } + nvDebug("\n"); + } +} + diff --git a/src/nvimage/Filter.h b/src/nvimage/Filter.h index 6ced86b..ab814f1 100644 --- a/src/nvimage/Filter.h +++ b/src/nvimage/Filter.h @@ -1,234 +1,234 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#pragma once -#ifndef NV_IMAGE_FILTER_H -#define NV_IMAGE_FILTER_H - -#include "nvimage.h" -#include "nvcore/Debug.h" - -namespace nv -{ - class Vector4; - - /// Base filter class. - class NVIMAGE_CLASS Filter - { - public: - Filter(float width); - virtual ~Filter(); - - float width() const { return m_width; } - float sampleDelta(float x, float scale) const; - float sampleBox(float x, float scale, int samples) const; - float sampleTriangle(float x, float scale, int samples) const; - - virtual float evaluate(float x) const = 0; - - protected: - const float m_width; - }; - - // Box filter. - class NVIMAGE_CLASS BoxFilter : public Filter - { - public: - BoxFilter(); - BoxFilter(float width); - virtual float evaluate(float x) const; - }; - - // Triangle (bilinear/tent) filter. - class NVIMAGE_CLASS TriangleFilter : public Filter - { - public: - TriangleFilter(); - TriangleFilter(float width); - virtual float evaluate(float x) const; - }; - - // Quadratic (bell) filter. - class NVIMAGE_CLASS QuadraticFilter : public Filter - { - public: - QuadraticFilter(); - virtual float evaluate(float x) const; - }; - - // Cubic filter from Thatcher Ulrich. - class NVIMAGE_CLASS CubicFilter : public Filter - { - public: - CubicFilter(); - virtual float evaluate(float x) const; - }; - - // Cubic b-spline filter from Paul Heckbert. - class NVIMAGE_CLASS BSplineFilter : public Filter - { - public: - BSplineFilter(); - virtual float evaluate(float x) const; - }; - - /// Mitchell & Netravali's two-param cubic - /// @see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88 - class NVIMAGE_CLASS MitchellFilter : public Filter - { - public: - MitchellFilter(); - virtual float evaluate(float x) const; - - void setParameters(float b, float c); - - private: - float p0, p2, p3; - float q0, q1, q2, q3; - }; - - // Lanczos3 filter. - class NVIMAGE_CLASS LanczosFilter : public Filter - { - public: - LanczosFilter(); - virtual float evaluate(float x) const; - }; - - // Sinc filter. - class NVIMAGE_CLASS SincFilter : public Filter - { - public: - SincFilter(float w); - virtual float evaluate(float x) const; - }; - - // Kaiser filter. - class NVIMAGE_CLASS KaiserFilter : public Filter - { - public: - KaiserFilter(float w); - virtual float evaluate(float x) const; - - void setParameters(float a, float stretch); - - private: - float alpha; - float stretch; - }; - - // Gaussian filter. - class GaussianFilter : public Filter - { - public: - GaussianFilter(float w); - virtual float evaluate(float x) const; - - void setParameters(float variance); - - private: - float variance; - }; - - - - /// A 1D kernel. Used to precompute filter weights. - class NVIMAGE_CLASS Kernel1 - { - NV_FORBID_COPY(Kernel1); - public: - Kernel1(const Filter & f, int iscale, int samples = 32); - ~Kernel1(); - - float valueAt(uint x) const { - nvDebugCheck(x < (uint)m_windowSize); - return m_data[x]; - } - - int windowSize() const { - return m_windowSize; - } - - float width() const { - return m_width; - } - - void debugPrint(); - - private: - int m_windowSize; - float m_width; - float * m_data; - }; - - - /// A 2D kernel. - class NVIMAGE_CLASS Kernel2 - { - public: - Kernel2(uint width); - Kernel2(uint width, const float * data); - Kernel2(const Kernel2 & k); - ~Kernel2(); - - void normalize(); - void transpose(); - - float valueAt(uint x, uint y) const { - return m_data[y * m_windowSize + x]; - } - - uint windowSize() const { - return m_windowSize; - } - - void initLaplacian(); - void initEdgeDetection(); - void initSobel(); - void initPrewitt(); - - void initBlendedSobel(const Vector4 & scale); - - private: - const uint m_windowSize; - float * m_data; - }; - - - /// A 1D polyphase kernel - class NVIMAGE_CLASS PolyphaseKernel - { - NV_FORBID_COPY(PolyphaseKernel); - public: - PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32); - ~PolyphaseKernel(); - - int windowSize() const { - return m_windowSize; - } - - uint length() const { - return m_length; - } - - float width() const { - return m_width; - } - - float valueAt(uint column, uint x) const { - nvDebugCheck(column < m_length); - nvDebugCheck(x < (uint)m_windowSize); - return m_data[column * m_windowSize + x]; - } - - void debugPrint() const; - - private: - int m_windowSize; - uint m_length; - float m_width; - float * m_data; - }; - -} // nv namespace - -#endif // NV_IMAGE_FILTER_H +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_IMAGE_FILTER_H +#define NV_IMAGE_FILTER_H + +#include "nvimage.h" +#include "nvcore/Debug.h" + +namespace nv +{ + class Vector4; + + /// Base filter class. + class NVIMAGE_CLASS Filter + { + public: + Filter(float width); + virtual ~Filter(); + + float width() const { return m_width; } + float sampleDelta(float x, float scale) const; + float sampleBox(float x, float scale, int samples) const; + float sampleTriangle(float x, float scale, int samples) const; + + virtual float evaluate(float x) const = 0; + + protected: + const float m_width; + }; + + // Box filter. + class NVIMAGE_CLASS BoxFilter : public Filter + { + public: + BoxFilter(); + BoxFilter(float width); + virtual float evaluate(float x) const; + }; + + // Triangle (bilinear/tent) filter. + class NVIMAGE_CLASS TriangleFilter : public Filter + { + public: + TriangleFilter(); + TriangleFilter(float width); + virtual float evaluate(float x) const; + }; + + // Quadratic (bell) filter. + class NVIMAGE_CLASS QuadraticFilter : public Filter + { + public: + QuadraticFilter(); + virtual float evaluate(float x) const; + }; + + // Cubic filter from Thatcher Ulrich. + class NVIMAGE_CLASS CubicFilter : public Filter + { + public: + CubicFilter(); + virtual float evaluate(float x) const; + }; + + // Cubic b-spline filter from Paul Heckbert. + class NVIMAGE_CLASS BSplineFilter : public Filter + { + public: + BSplineFilter(); + virtual float evaluate(float x) const; + }; + + /// Mitchell & Netravali's two-param cubic + /// @see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88 + class NVIMAGE_CLASS MitchellFilter : public Filter + { + public: + MitchellFilter(); + virtual float evaluate(float x) const; + + void setParameters(float b, float c); + + private: + float p0, p2, p3; + float q0, q1, q2, q3; + }; + + // Lanczos3 filter. + class NVIMAGE_CLASS LanczosFilter : public Filter + { + public: + LanczosFilter(); + virtual float evaluate(float x) const; + }; + + // Sinc filter. + class NVIMAGE_CLASS SincFilter : public Filter + { + public: + SincFilter(float w); + virtual float evaluate(float x) const; + }; + + // Kaiser filter. + class NVIMAGE_CLASS KaiserFilter : public Filter + { + public: + KaiserFilter(float w); + virtual float evaluate(float x) const; + + void setParameters(float a, float stretch); + + private: + float alpha; + float stretch; + }; + + // Gaussian filter. + class GaussianFilter : public Filter + { + public: + GaussianFilter(float w); + virtual float evaluate(float x) const; + + void setParameters(float variance); + + private: + float variance; + }; + + + + /// A 1D kernel. Used to precompute filter weights. + class NVIMAGE_CLASS Kernel1 + { + NV_FORBID_COPY(Kernel1); + public: + Kernel1(const Filter & f, int iscale, int samples = 32); + ~Kernel1(); + + float valueAt(uint x) const { + nvDebugCheck(x < (uint)m_windowSize); + return m_data[x]; + } + + int windowSize() const { + return m_windowSize; + } + + float width() const { + return m_width; + } + + void debugPrint(); + + private: + int m_windowSize; + float m_width; + float * m_data; + }; + + + /// A 2D kernel. + class NVIMAGE_CLASS Kernel2 + { + public: + Kernel2(uint width); + Kernel2(uint width, const float * data); + Kernel2(const Kernel2 & k); + ~Kernel2(); + + void normalize(); + void transpose(); + + float valueAt(uint x, uint y) const { + return m_data[y * m_windowSize + x]; + } + + uint windowSize() const { + return m_windowSize; + } + + void initLaplacian(); + void initEdgeDetection(); + void initSobel(); + void initPrewitt(); + + void initBlendedSobel(const Vector4 & scale); + + private: + const uint m_windowSize; + float * m_data; + }; + + + /// A 1D polyphase kernel + class NVIMAGE_CLASS PolyphaseKernel + { + NV_FORBID_COPY(PolyphaseKernel); + public: + PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32); + ~PolyphaseKernel(); + + int windowSize() const { + return m_windowSize; + } + + uint length() const { + return m_length; + } + + float width() const { + return m_width; + } + + float valueAt(uint column, uint x) const { + nvDebugCheck(column < m_length); + nvDebugCheck(x < (uint)m_windowSize); + return m_data[column * m_windowSize + x]; + } + + void debugPrint() const; + + private: + int m_windowSize; + uint m_length; + float m_width; + float * m_data; + }; + +} // nv namespace + +#endif // NV_IMAGE_FILTER_H diff --git a/src/nvimage/FloatImage.h b/src/nvimage/FloatImage.h index d618be0..39085c8 100644 --- a/src/nvimage/FloatImage.h +++ b/src/nvimage/FloatImage.h @@ -235,7 +235,7 @@ namespace nv nvDebugCheck(x < m_width); nvDebugCheck(y < m_height); nvDebugCheck(z < m_depth); - return m_mem[((c * m_depth + z) * m_height + y) * m_width + x]; + return m_mem[c * m_pixelCount + index(x, y, z)]; } /// Get pixel component. @@ -246,7 +246,7 @@ namespace nv nvDebugCheck(x < m_width); nvDebugCheck(y < m_height); nvDebugCheck(z < m_depth); - return m_mem[((c * m_depth + z) * m_height + y) * m_width + x]; + return m_mem[c * m_pixelCount + index(x, y, z)]; } /// Get pixel component. @@ -255,7 +255,7 @@ namespace nv nvDebugCheck(m_mem != NULL); nvDebugCheck(c < m_componentCount); nvDebugCheck(idx < m_pixelCount); - return m_mem[c * m_height * m_width + idx]; + return m_mem[c * m_pixelCount + idx]; } /// Get pixel component. @@ -264,7 +264,7 @@ namespace nv nvDebugCheck(m_mem != NULL); nvDebugCheck(c < m_componentCount); nvDebugCheck(idx < m_pixelCount); - return m_mem[c * m_height * m_width + idx]; + return m_mem[c * m_pixelCount + idx]; } /// Get pixel component. @@ -288,7 +288,9 @@ namespace nv nvDebugCheck(x < m_width); nvDebugCheck(y < m_height); nvDebugCheck(z < m_depth); - return (z * m_height + y) * m_width + x; + uint idx = (z * m_height + y) * m_width + x; + nvDebugCheck(idx < m_pixelCount); + return idx; } diff --git a/src/nvimage/Image.cpp b/src/nvimage/Image.cpp index 006c324..495110f 100644 --- a/src/nvimage/Image.cpp +++ b/src/nvimage/Image.cpp @@ -1,160 +1,160 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include "Image.h" -#include "ImageIO.h" - -#include "nvmath/Color.h" - -#include "nvcore/Debug.h" -#include "nvcore/Ptr.h" -#include "nvcore/Utils.h" // swap - - -using namespace nv; - -Image::Image() : m_width(0), m_height(0), m_format(Format_RGB), m_data(NULL) -{ -} - -Image::Image(const Image & img) : m_data(NULL) -{ - allocate(img.m_width, img.m_height, img.m_depth); - m_format = img.m_format; - memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height * m_depth); -} - -Image::~Image() -{ - free(); -} - -const Image & Image::operator=(const Image & img) -{ - allocate(img.m_width, img.m_height, m_depth); - m_format = img.m_format; - memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height * m_depth); - return *this; -} - - -void Image::allocate(uint w, uint h, uint d) -{ - free(); - m_width = w; - m_height = h; - m_depth = d; - m_data = realloc(m_data, w * h * d); -} - -bool Image::load(const char * name) -{ - free(); - - AutoPtr img(ImageIO::load(name)); - if (img == NULL) { - return false; - } - - swap(m_width, img->m_width); - swap(m_height, img->m_height); - swap(m_depth, img->m_depth); - swap(m_format, img->m_format); - swap(m_data, img->m_data); - - return true; -} - -void Image::wrap(void * data, uint w, uint h, uint d) -{ - free(); - m_data = (Color32 *)data; - m_width = w; - m_height = h; - m_depth = d; -} - -void Image::unwrap() -{ - m_data = NULL; - m_width = 0; - m_height = 0; - m_depth = 0; -} - - -void Image::free() -{ - ::free(m_data); - m_data = NULL; -} - - -uint Image::width() const -{ - return m_width; -} - -uint Image::height() const -{ - return m_height; -} - -uint Image::depth() const -{ - return m_depth; -} - -const Color32 * Image::scanline(uint h) const -{ - nvDebugCheck(h < m_height); - return m_data + h * m_width; -} - -Color32 * Image::scanline(uint h) -{ - nvDebugCheck(h < m_height); - return m_data + h * m_width; -} - -const Color32 * Image::pixels() const -{ - return m_data; -} - -Color32 * Image::pixels() -{ - return m_data; -} - -const Color32 & Image::pixel(uint idx) const -{ - nvDebugCheck(idx < m_width * m_height * m_depth); - return m_data[idx]; -} - -Color32 & Image::pixel(uint idx) -{ - nvDebugCheck(idx < m_width * m_height * m_depth); - return m_data[idx]; -} - - -Image::Format Image::format() const -{ - return m_format; -} - -void Image::setFormat(Image::Format f) -{ - m_format = f; -} - -void Image::fill(Color32 c) -{ - const uint size = m_width * m_height * m_depth; - for (uint i = 0; i < size; ++i) - { - m_data[i] = c; - } -} - +// This code is in the public domain -- castanyo@yahoo.es + +#include "Image.h" +#include "ImageIO.h" + +#include "nvmath/Color.h" + +#include "nvcore/Debug.h" +#include "nvcore/Ptr.h" +#include "nvcore/Utils.h" // swap + + +using namespace nv; + +Image::Image() : m_width(0), m_height(0), m_format(Format_RGB), m_data(NULL) +{ +} + +Image::Image(const Image & img) : m_data(NULL) +{ + allocate(img.m_width, img.m_height, img.m_depth); + m_format = img.m_format; + memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height * m_depth); +} + +Image::~Image() +{ + free(); +} + +const Image & Image::operator=(const Image & img) +{ + allocate(img.m_width, img.m_height, m_depth); + m_format = img.m_format; + memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height * m_depth); + return *this; +} + + +void Image::allocate(uint w, uint h, uint d) +{ + free(); + m_width = w; + m_height = h; + m_depth = d; + m_data = realloc(m_data, w * h * d); +} + +bool Image::load(const char * name) +{ + free(); + + AutoPtr img(ImageIO::load(name)); + if (img == NULL) { + return false; + } + + swap(m_width, img->m_width); + swap(m_height, img->m_height); + swap(m_depth, img->m_depth); + swap(m_format, img->m_format); + swap(m_data, img->m_data); + + return true; +} + +void Image::wrap(void * data, uint w, uint h, uint d) +{ + free(); + m_data = (Color32 *)data; + m_width = w; + m_height = h; + m_depth = d; +} + +void Image::unwrap() +{ + m_data = NULL; + m_width = 0; + m_height = 0; + m_depth = 0; +} + + +void Image::free() +{ + ::free(m_data); + m_data = NULL; +} + + +uint Image::width() const +{ + return m_width; +} + +uint Image::height() const +{ + return m_height; +} + +uint Image::depth() const +{ + return m_depth; +} + +const Color32 * Image::scanline(uint h) const +{ + nvDebugCheck(h < m_height); + return m_data + h * m_width; +} + +Color32 * Image::scanline(uint h) +{ + nvDebugCheck(h < m_height); + return m_data + h * m_width; +} + +const Color32 * Image::pixels() const +{ + return m_data; +} + +Color32 * Image::pixels() +{ + return m_data; +} + +const Color32 & Image::pixel(uint idx) const +{ + nvDebugCheck(idx < m_width * m_height * m_depth); + return m_data[idx]; +} + +Color32 & Image::pixel(uint idx) +{ + nvDebugCheck(idx < m_width * m_height * m_depth); + return m_data[idx]; +} + + +Image::Format Image::format() const +{ + return m_format; +} + +void Image::setFormat(Image::Format f) +{ + m_format = f; +} + +void Image::fill(Color32 c) +{ + const uint size = m_width * m_height * m_depth; + for (uint i = 0; i < size; ++i) + { + m_data[i] = c; + } +} + diff --git a/src/nvimage/Image.h b/src/nvimage/Image.h index 9161e57..729ccd4 100644 --- a/src/nvimage/Image.h +++ b/src/nvimage/Image.h @@ -1,86 +1,86 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#pragma once -#ifndef NV_IMAGE_IMAGE_H -#define NV_IMAGE_IMAGE_H - -#include "nvimage.h" -#include "nvcore/Debug.h" - -namespace nv -{ - class Color32; - - /// 32 bit RGBA image. - class NVIMAGE_CLASS Image - { - public: - - enum Format - { - Format_RGB, - Format_ARGB, - }; - - Image(); - Image(const Image & img); - ~Image(); - - const Image & operator=(const Image & img); - - - void allocate(uint w, uint h, uint d = 1); - bool load(const char * name); - - void wrap(void * data, uint w, uint h, uint d = 1); - void unwrap(); - - uint width() const; - uint height() const; - uint depth() const; - - const Color32 * scanline(uint h) const; - Color32 * scanline(uint h); - - const Color32 * pixels() const; - Color32 * pixels(); - - const Color32 & pixel(uint idx) const; - Color32 & pixel(uint idx); - - const Color32 & pixel(uint x, uint y) const; - Color32 & pixel(uint x, uint y); - - Format format() const; - void setFormat(Format f); - - void fill(Color32 c); - - private: - void free(); - - private: - uint m_width; - uint m_height; - uint m_depth; - Format m_format; - Color32 * m_data; - }; - - - inline const Color32 & Image::pixel(uint x, uint y) const - { - nvDebugCheck(x < m_width && y < m_height); - return pixel(y * m_width + x); - } - - inline Color32 & Image::pixel(uint x, uint y) - { - nvDebugCheck(x < m_width && y < m_height); - return pixel(y * m_width + x); - } - -} // nv namespace - - -#endif // NV_IMAGE_IMAGE_H +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_IMAGE_IMAGE_H +#define NV_IMAGE_IMAGE_H + +#include "nvimage.h" +#include "nvcore/Debug.h" + +namespace nv +{ + class Color32; + + /// 32 bit RGBA image. + class NVIMAGE_CLASS Image + { + public: + + enum Format + { + Format_RGB, + Format_ARGB, + }; + + Image(); + Image(const Image & img); + ~Image(); + + const Image & operator=(const Image & img); + + + void allocate(uint w, uint h, uint d = 1); + bool load(const char * name); + + void wrap(void * data, uint w, uint h, uint d = 1); + void unwrap(); + + uint width() const; + uint height() const; + uint depth() const; + + const Color32 * scanline(uint h) const; + Color32 * scanline(uint h); + + const Color32 * pixels() const; + Color32 * pixels(); + + const Color32 & pixel(uint idx) const; + Color32 & pixel(uint idx); + + const Color32 & pixel(uint x, uint y) const; + Color32 & pixel(uint x, uint y); + + Format format() const; + void setFormat(Format f); + + void fill(Color32 c); + + private: + void free(); + + private: + uint m_width; + uint m_height; + uint m_depth; + Format m_format; + Color32 * m_data; + }; + + + inline const Color32 & Image::pixel(uint x, uint y) const + { + nvDebugCheck(x < m_width && y < m_height); + return pixel(y * m_width + x); + } + + inline Color32 & Image::pixel(uint x, uint y) + { + nvDebugCheck(x < m_width && y < m_height); + return pixel(y * m_width + x); + } + +} // nv namespace + + +#endif // NV_IMAGE_IMAGE_H diff --git a/src/nvimage/ImageIO.cpp b/src/nvimage/ImageIO.cpp index f0d5b32..9a81c00 100644 --- a/src/nvimage/ImageIO.cpp +++ b/src/nvimage/ImageIO.cpp @@ -1,1943 +1,1943 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include "ImageIO.h" -#include "Image.h" -#include "FloatImage.h" -#include "TgaFile.h" -#include "PsdFile.h" -#include "DirectDrawSurface.h" -#include "PixelFormat.h" - -#include "nvmath/Color.h" -#include "nvmath/Half.h" - -#include "nvcore/Ptr.h" -#include "nvcore/Utils.h" -#include "nvcore/Array.h" -#include "nvcore/StrLib.h" -#include "nvcore/StdStream.h" -#include "nvcore/TextWriter.h" - -// Extern -#if defined(HAVE_FREEIMAGE) -# include -// If FreeImage available, do not use individual libraries, since that produces link conflicts in some platforms. -# undef HAVE_JPEG -# undef HAVE_PNG -# undef HAVE_TIFF -# undef HAVE_OPENEXR -#endif - -#if defined(HAVE_JPEG) -extern "C" { -# include -} -#endif - -#if defined(HAVE_PNG) -# include -#endif - -#if defined(HAVE_TIFF) -# define _TIFF_DATA_TYPEDEFS_ -# include -#endif - -#if defined(HAVE_OPENEXR) -# include -# include -# include -# include -# include -# include -#endif - -#if defined(HAVE_STBIMAGE) -# define STBI_NO_STDIO -# include -#endif - - -using namespace nv; - - - -struct Color555 { - uint16 b : 5; - uint16 g : 5; - uint16 r : 5; -}; - -// Load TGA image. -static Image * loadTGA(Stream & s) -{ - nvCheck(!s.isError()); - nvCheck(s.isLoading()); - - TgaHeader tga; - s << tga; - s.seek(TgaHeader::Size + tga.id_length); - - // Get header info. - bool rle = false; - bool pal = false; - bool rgb = false; - bool grey = false; - - switch( tga.image_type ) { - case TGA_TYPE_RLE_INDEXED: - rle = true; - // no break is intended! - case TGA_TYPE_INDEXED: - if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) { - nvDebug( "*** loadTGA: Error, only 24bit paletted images are supported.\n" ); - return NULL; - } - pal = true; - break; - - case TGA_TYPE_RLE_RGB: - rle = true; - // no break is intended! - case TGA_TYPE_RGB: - rgb = true; - break; - - case TGA_TYPE_RLE_GREY: - rle = true; - // no break is intended! - case TGA_TYPE_GREY: - grey = true; - break; - - default: - nvDebug( "*** loadTGA: Error, unsupported image type.\n" ); - return NULL; - } - - const uint pixel_size = (tga.pixel_size/8); - nvDebugCheck(pixel_size <= 4); - - const uint size = tga.width * tga.height * pixel_size; - - - // Read palette - uint8 palette[768]; - if( pal ) { - nvDebugCheck(tga.colormap_length <= 256); - s.serialize(palette, 3 * tga.colormap_length); - } - - // Decode image. - uint8 * mem = new uint8[size]; - if( rle ) { - // Decompress image in src. - uint8 * dst = mem; - int num = size; - - while (num > 0) { - // Get packet header - uint8 c; - s << c; - - uint count = (c & 0x7f) + 1; - num -= count * pixel_size; - - if (c & 0x80) { - // RLE pixels. - uint8 pixel[4]; // uint8 pixel[pixel_size]; - s.serialize( pixel, pixel_size ); - do { - memcpy(dst, pixel, pixel_size); - dst += pixel_size; - } while (--count); - } - else { - // Raw pixels. - count *= pixel_size; - //file->Read8(dst, count); - s.serialize(dst, count); - dst += count; - } - } - } - else { - s.serialize(mem, size); - } - - // Allocate image. - AutoPtr img(new Image()); - img->allocate(tga.width, tga.height); - - int lstep; - Color32 * dst; - if( tga.flags & TGA_ORIGIN_UPPER ) { - lstep = tga.width; - dst = img->pixels(); - } - else { - lstep = - tga.width; - dst = img->pixels() + (tga.height-1) * tga.width; - } - - // Write image. - uint8 * src = mem; - if( pal ) { - for( int y = 0; y < tga.height; y++ ) { - for( int x = 0; x < tga.width; x++ ) { - uint8 idx = *src++; - dst[x].setBGRA(palette[3*idx+0], palette[3*idx+1], palette[3*idx+2], 0xFF); - } - dst += lstep; - } - } - else if( grey ) { - img->setFormat(Image::Format_ARGB); - - for( int y = 0; y < tga.height; y++ ) { - for( int x = 0; x < tga.width; x++ ) { - dst[x].setBGRA(*src, *src, *src, *src); - src++; - } - dst += lstep; - } - } - else { - - if( tga.pixel_size == 16 ) { - for( int y = 0; y < tga.height; y++ ) { - for( int x = 0; x < tga.width; x++ ) { - Color555 c = *reinterpret_cast(src); - uint8 b = (c.b << 3) | (c.b >> 2); - uint8 g = (c.g << 3) | (c.g >> 2); - uint8 r = (c.r << 3) | (c.r >> 2); - dst[x].setBGRA(b, g, r, 0xFF); - src += 2; - } - dst += lstep; - } - } - else if( tga.pixel_size == 24 ) { - for( int y = 0; y < tga.height; y++ ) { - for( int x = 0; x < tga.width; x++ ) { - dst[x].setBGRA(src[0], src[1], src[2], 0xFF); - src += 3; - } - dst += lstep; - } - } - else if( tga.pixel_size == 32 ) { - img->setFormat(Image::Format_ARGB); - - for( int y = 0; y < tga.height; y++ ) { - for( int x = 0; x < tga.width; x++ ) { - dst[x].setBGRA(src[0], src[1], src[2], src[3]); - src += 4; - } - dst += lstep; - } - } - } - - // free uncompressed data. - delete [] mem; - - return img.release(); -} - -// Save TGA image. -static bool saveTGA(Stream & s, const Image * img) -{ - nvCheck(!s.isError()); - nvCheck(img != NULL); - nvCheck(img->pixels() != NULL); - - TgaFile tga; - tga.head.id_length = 0; - tga.head.colormap_type = 0; - tga.head.image_type = TGA_TYPE_RGB; - - tga.head.colormap_index = 0; - tga.head.colormap_length = 0; - tga.head.colormap_size = 0; - - tga.head.x_origin = 0; - tga.head.y_origin = 0; - tga.head.width = img->width(); - tga.head.height = img->height(); - if(img->format() == Image::Format_ARGB) { - tga.head.pixel_size = 32; - tga.head.flags = TGA_ORIGIN_UPPER | TGA_HAS_ALPHA; - } - else { - tga.head.pixel_size = 24; - tga.head.flags = TGA_ORIGIN_UPPER; - } - - // @@ Serialize directly. - tga.allocate(); - - const uint n = img->width() * img->height(); - if(img->format() == Image::Format_ARGB) { - for(uint i = 0; i < n; i++) { - Color32 color = img->pixel(i); - tga.mem[4 * i + 0] = color.b; - tga.mem[4 * i + 1] = color.g; - tga.mem[4 * i + 2] = color.r; - tga.mem[4 * i + 3] = color.a; - } - } - else { - for(uint i = 0; i < n; i++) { - Color32 color = img->pixel(i); - tga.mem[3 * i + 0] = color.b; - tga.mem[3 * i + 1] = color.g; - tga.mem[3 * i + 2] = color.r; - } - } - - s << tga; - - tga.free(); - - return true; -} - -/*static Image * loadPPM(Stream & s) -{ - // @@ - return NULL; -}*/ - -// Save PPM image. -static bool savePPM(Stream & s, const Image * img) -{ - //if (img->depth() != 1) return false; - //if (img->format() == Image::Format_ARGB) return false; - - uint w = img->width(); - uint h = img->height(); - - TextWriter writer(&s); - writer.write("P6\n"); - writer.write("%d %d\n", w, h); - writer.write("255\n"); - for (uint i = 0; i < w * h; i++) { - Color32 c = img->pixel(i); - s << c.r << c.g << c.b; - } - - return true; -} - - -/*static FloatImage * loadFloatPFM(Stream & s) -{ - return NULL; -}*/ - -/*static bool saveFloatPFM(Stream & s, const FloatImage * img, uint base_channel, uint channel_count) -{ - return false; -}*/ - -// Load PSD image. -static Image * loadPSD(Stream & s) -{ - nvCheck(!s.isError()); - nvCheck(s.isLoading()); - - s.setByteOrder(Stream::BigEndian); - - PsdHeader header; - s << header; - - if (!header.isValid()) - { - printf("invalid header!\n"); - return NULL; - } - - if (!header.isSupported()) - { - printf("unsupported file!\n"); - return NULL; - } - - int tmp; - - // Skip mode data. - s << tmp; - s.seek(s.tell() + tmp); - - // Skip image resources. - s << tmp; - s.seek(s.tell() + tmp); - - // Skip the reserved data. - s << tmp; - s.seek(s.tell() + tmp); - - // Find out if the data is compressed. - // Known values: - // 0: no compression - // 1: RLE compressed - uint16 compression; - s << compression; - - if (compression > 1) { - // Unknown compression type. - return NULL; - } - - uint channel_num = header.channel_count; - - AutoPtr img(new Image()); - img->allocate(header.width, header.height); - - if (channel_num < 4) - { - // Clear the image. - img->fill(Color32(0, 0, 0, 0xFF)); - } - else - { - // Enable alpha. - img->setFormat(Image::Format_ARGB); - - // Ignore remaining channels. - channel_num = 4; - } - - - const uint pixel_count = header.height * header.width; - - static const uint components[4] = {2, 1, 0, 3}; - - if (compression) - { - s.seek(s.tell() + header.height * header.channel_count * sizeof(uint16)); - - // Read RLE data. - for (uint channel = 0; channel < channel_num; channel++) - { - uint8 * ptr = (uint8 *)img->pixels() + components[channel]; - - uint count = 0; - while( count < pixel_count ) - { - if (s.isAtEnd()) return NULL; - - uint8 c; - s << c; - - uint len = c; - if (len < 128) - { - // Copy next len+1 bytes literally. - len++; - count += len; - if (count > pixel_count) return NULL; - - while (len != 0) - { - s << *ptr; - ptr += 4; - len--; - } - } - else if (len > 128) - { - // Next -len+1 bytes in the dest are replicated from next source byte. - // (Interpret len as a negative 8-bit int.) - len ^= 0xFF; - len += 2; - count += len; - if (s.isAtEnd() || count > pixel_count) return NULL; - - uint8 val; - s << val; - while( len != 0 ) { - *ptr = val; - ptr += 4; - len--; - } - } - else if( len == 128 ) { - // No-op. - } - } - } - } - else - { - // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) - // where each channel consists of an 8-bit value for each pixel in the image. - - // Read the data by channel. - for (uint channel = 0; channel < channel_num; channel++) - { - uint8 * ptr = (uint8 *)img->pixels() + components[channel]; - - // Read the data. - uint count = pixel_count; - while (count != 0) - { - s << *ptr; - ptr += 4; - count--; - } - } - } - - return img.release(); -} - -static FloatImage * loadFloatDDS(Stream & s) -{ - nvCheck(s.isLoading()); - nvCheck(!s.isError()); - - DDSHeader header; - s << header; - - static const uint D3DFMT_A16B16G16R16F = 113; - - // @@ We only support RGBA16F for now. - if (header.pf.fourcc == D3DFMT_A16B16G16R16F) { - const int size = header.width * header.height; - uint16 * const data = new uint16[size * 4]; - - s.serialize(data, size * 4 * sizeof(uint16)); - - FloatImage * img = new FloatImage; - img->allocate(4, header.width, header.height); - - uint32 * r = (uint32 *)img->channel(0); - uint32 * g = (uint32 *)img->channel(1); - uint32 * b = (uint32 *)img->channel(2); - uint32 * a = (uint32 *)img->channel(3); - - uint16 * ptr = data; - for (int i = 0; i < size; i++) { - *r++ = half_to_float( *ptr++ ); - *g++ = half_to_float( *ptr++ ); - *b++ = half_to_float( *ptr++ ); - *a++ = half_to_float( *ptr++ ); - } - - delete [] data; - - return img; - } - - return NULL; -} - -static bool saveFloatDDS(Stream & s, const FloatImage * img, uint base_component, uint num_components) -{ - nvCheck(s.isSaving()); - nvCheck(!s.isError()); - - if (num_components != 4) return false; - - static const uint D3DFMT_A16B16G16R16F = 113; - - DDSHeader header; - header.setTexture2D(); - header.setWidth(img->width()); - header.setHeight(img->height()); - header.setFormatCode(D3DFMT_A16B16G16R16F); - // ... - - s << header; - - uint32 * r = (uint32 *)img->channel(base_component + 0); - uint32 * g = (uint32 *)img->channel(base_component + 1); - uint32 * b = (uint32 *)img->channel(base_component + 2); - uint32 * a = (uint32 *)img->channel(base_component + 3); - - const uint size = img->width() * img->height(); - for (uint i = 0; i < size; i++) { - uint16 R = half_from_float( *r++ ); - uint16 G = half_from_float( *g++ ); - uint16 B = half_from_float( *b++ ); - uint16 A = half_from_float( *a++ ); - - s.serialize(&R, sizeof(uint16)); - s.serialize(&G, sizeof(uint16)); - s.serialize(&B, sizeof(uint16)); - s.serialize(&A, sizeof(uint16)); - } - - return true; -} - - -#if defined(HAVE_PNG) - -static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length) -{ - nvDebugCheck(png_ptr != NULL); - - Stream * s = (Stream *)png_get_io_ptr(png_ptr); - s->serialize(data, (int)length); - - if (s->isError()) { - png_error(png_ptr, "Read Error"); - } -} - - -static Image * loadPNG(Stream & s) -{ - nvCheck(!s.isError()); - - // Set up a read buffer and check the library version - png_structp png_ptr; - png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); - if (png_ptr == NULL) { - // nvDebug( "*** LoadPNG: Error allocating read buffer in file '%s'.\n", name ); - return false; - } - - // Allocate/initialize a memory block for the image information - png_infop info_ptr = png_create_info_struct(png_ptr); - if (info_ptr == NULL) { - png_destroy_read_struct(&png_ptr, NULL, NULL); - // nvDebug( "*** LoadPNG: Error allocating image information for '%s'.\n", name ); - return false; - } - - // Set up the error handling - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - // nvDebug( "*** LoadPNG: Error reading png file '%s'.\n", name ); - return false; - } - - // Set up the I/O functions. - png_set_read_fn(png_ptr, (void*)&s, user_read_data); - - - // Retrieve the image header information - png_uint_32 width, height; - int bit_depth, color_type, interlace_type; - png_read_info(png_ptr, info_ptr); - png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, &interlace_type, NULL, NULL); - - - if (color_type == PNG_COLOR_TYPE_PALETTE && bit_depth <= 8) { - // Convert indexed images to RGB. - png_set_expand(png_ptr); - } - else if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) { - // Convert grayscale to RGB. - png_set_expand(png_ptr); - } - else if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) { - // Expand images with transparency to full alpha channels - // so the data will be available as RGBA quartets. - png_set_expand(png_ptr); - } - else if (bit_depth < 8) { - // If we have < 8 scale it up to 8. - //png_set_expand(png_ptr); - png_set_packing(png_ptr); - } - - // Reduce bit depth. - if (bit_depth == 16) { - png_set_strip_16(png_ptr); - } - - // Represent gray as RGB - if (color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_GRAY_ALPHA) { - png_set_gray_to_rgb(png_ptr); - } - - // Convert to RGBA filling alpha with 0xFF. - if (!(color_type & PNG_COLOR_MASK_ALPHA)) { - png_set_filler(png_ptr, 0xFF, PNG_FILLER_AFTER); - } - - // @todo Choose gamma according to the platform? - double screen_gamma = 2.2; - int intent; - if (png_get_sRGB(png_ptr, info_ptr, &intent)) { - png_set_gamma(png_ptr, screen_gamma, 0.45455); - } - else { - double image_gamma; - if (png_get_gAMA(png_ptr, info_ptr, &image_gamma)) { - png_set_gamma(png_ptr, screen_gamma, image_gamma); - } - else { - png_set_gamma(png_ptr, screen_gamma, 0.45455); - } - } - - // Perform the selected transforms. - png_read_update_info(png_ptr, info_ptr); - - png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, &interlace_type, NULL, NULL); - - AutoPtr img(new Image()); - img->allocate(width, height); - - // Set internal format flags. - if(color_type & PNG_COLOR_MASK_COLOR) { - //img->flags |= PI_IF_HAS_COLOR; - } - if(color_type & PNG_COLOR_MASK_ALPHA) { - //img->flags |= PI_IF_HAS_ALPHA; - img->setFormat(Image::Format_ARGB); - } - - // Read the image - uint8 * pixels = (uint8 *)img->pixels(); - png_bytep * row_data = new png_bytep[sizeof(png_byte) * height]; - for (uint i = 0; i < height; i++) { - row_data[i] = &(pixels[width * 4 * i]); - } - - png_read_image(png_ptr, row_data); - delete [] row_data; - - // Finish things up - png_read_end(png_ptr, info_ptr); - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - - // RGBA to BGRA. - uint num = width * height; - for(uint i = 0; i < num; i++) - { - Color32 c = img->pixel(i); - img->pixel(i) = Color32(c.b, c.g, c.r, c.a); - } - - // Compute alpha channel if needed. - /*if( img->flags & PI_IU_BUMPMAP || img->flags & PI_IU_ALPHAMAP ) { - if( img->flags & PI_IF_HAS_COLOR && !(img->flags & PI_IF_HAS_ALPHA)) { - img->ComputeAlphaFromColor(); - } - }*/ - - return img.release(); -} - -static void user_write_data(png_structp png_ptr, png_bytep data, png_size_t length) -{ - nvDebugCheck(png_ptr != NULL); - - Stream * s = (Stream *)png_get_io_ptr(png_ptr); - s->serialize(data, (int)length); - - if (s->isError()) { - png_error(png_ptr, "Write Error"); - } -} - -static void user_write_flush(png_structp png_ptr) { } - -static bool savePNG(Stream & s, const Image * img, const char ** tags/*=NULL*/) -{ - nvCheck(!s.isError()); - nvCheck(img != NULL); - nvCheck(img->pixels() != NULL); - - // Set up a write buffer and check the library version - png_structp png_ptr; - png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); - if (png_ptr == NULL) { - return false; - } - - // Allocate/initialize a memory block for the image information - png_infop info_ptr = png_create_info_struct(png_ptr); - if (info_ptr == NULL) { - png_destroy_write_struct(&png_ptr, NULL); - return false; - } - - // Set up the error handling - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_write_struct(&png_ptr, &info_ptr); - return false; - } - - // Set up the I/O functions. - png_set_write_fn(png_ptr, (void*)&s, user_write_data, user_write_flush); - - // Set image header information - int color_type = PNG_COLOR_TYPE_RGBA; - switch(img->format()) - { - case Image::Format_RGB: color_type = PNG_COLOR_TYPE_RGB; break; - case Image::Format_ARGB: color_type = PNG_COLOR_TYPE_RGBA; break; - } - png_set_IHDR(png_ptr, info_ptr, img->width(), img->height(), - 8, color_type, PNG_INTERLACE_NONE, - PNG_COMPRESSION_TYPE_DEFAULT, - PNG_FILTER_TYPE_DEFAULT); - - // Set image data - png_bytep * row_data = new png_bytep[sizeof(png_byte) * img->height()]; - for (uint i = 0; i < img->height(); i++) { - row_data[i] = (png_byte*)img->scanline (i); - if (img->format() == Image::Format_RGB) row_data[i]--; // This is a bit of a hack, libpng expects images in ARGB format not BGRA, it supports BGR swapping, but not alpha swapping. - } - png_set_rows(png_ptr, info_ptr, row_data); - - png_text * text = NULL; - if (tags != NULL) - { - int count = 0; - while(tags[2 * count] != NULL) count++; - - text = new png_text[count]; - memset(text, 0, count * sizeof(png_text); - - for (int i = 0; i < count; i++) { - text[i].compression = PNG_TEXT_COMPRESSION_NONE; - text[i].key = tags[2 * i + 0]; - text[i].text = tags[2 * i + 1]; - } - - png_set_text(png_ptr, info_ptr, text, count); - } - - png_write_png(png_ptr, info_ptr, - // component order is BGR(A) - PNG_TRANSFORM_BGR | - // Strip alpha byte for RGB images - (img->format() == Image::Format_RGB ? PNG_TRANSFORM_STRIP_FILLER : 0) - , NULL); - - // Finish things up - png_destroy_write_struct(&png_ptr, &info_ptr); - - delete [] row_data; - delete [] text; - - return true; -} - -#endif // defined(HAVE_PNG) - -#if defined(HAVE_JPEG) - -static void init_source (j_decompress_ptr /*cinfo*/){ -} - -static boolean fill_input_buffer (j_decompress_ptr cinfo) { - struct jpeg_source_mgr * src = cinfo->src; - static JOCTET FakeEOI[] = { 0xFF, JPEG_EOI }; - - // Generate warning - nvDebug("jpeglib: Premature end of file\n"); - - // Insert a fake EOI marker - src->next_input_byte = FakeEOI; - src->bytes_in_buffer = 2; - - return TRUE; -} - -static void skip_input_data (j_decompress_ptr cinfo, long num_bytes) { - struct jpeg_source_mgr * src = cinfo->src; - - if(num_bytes >= (long)src->bytes_in_buffer) { - fill_input_buffer(cinfo); - return; - } - - src->bytes_in_buffer -= num_bytes; - src->next_input_byte += num_bytes; -} - -static void term_source (j_decompress_ptr /*cinfo*/){ - // no work necessary here -} - - -static Image * loadJPG(Stream & s) -{ - nvCheck(!s.isError()); - - // Read the entire file. - Array byte_array; - byte_array.resize(s.size()); - s.serialize(byte_array.buffer(), s.size()); - - jpeg_decompress_struct cinfo; - jpeg_error_mgr jerr; - - cinfo.err = jpeg_std_error(&jerr); - jpeg_create_decompress(&cinfo); - - cinfo.src = (struct jpeg_source_mgr *) (*cinfo.mem->alloc_small) - ((j_common_ptr) &cinfo, JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr)); - cinfo.src->init_source = init_source; - cinfo.src->fill_input_buffer = fill_input_buffer; - cinfo.src->skip_input_data = skip_input_data; - cinfo.src->resync_to_restart = jpeg_resync_to_restart; // use default method - cinfo.src->term_source = term_source; - cinfo.src->bytes_in_buffer = byte_array.size(); - cinfo.src->next_input_byte = byte_array.buffer(); - - jpeg_read_header(&cinfo, TRUE); - jpeg_start_decompress(&cinfo); - - /* - cinfo.do_fancy_upsampling = FALSE; // fast decompression - cinfo.dct_method = JDCT_FLOAT; // Choose floating point DCT method. - */ - - uint8 * tmp_buffer = new uint8 [cinfo.output_width * cinfo.output_height * cinfo.num_components]; - uint8 * scanline = tmp_buffer; - - while( cinfo.output_scanline < cinfo.output_height ){ - int num_scanlines = jpeg_read_scanlines (&cinfo, &scanline, 1); - scanline += num_scanlines * cinfo.output_width * cinfo.num_components; - } - - jpeg_finish_decompress(&cinfo); - - AutoPtr img(new Image()); - img->allocate(cinfo.output_width, cinfo.output_height); - - Color32 * dst = img->pixels(); - const int size = img->height() * img->width(); - const uint8 * src = tmp_buffer; - - if( cinfo.num_components == 3 ) { - img->setFormat(Image::Format_RGB); - for( int i = 0; i < size; i++ ) { - *dst++ = Color32(src[0], src[1], src[2]); - src += 3; - } - } - else { - img->setFormat(Image::Format_ARGB); - for( int i = 0; i < size; i++ ) { - *dst++ = Color32(*src, *src, *src, *src); - src++; - } - } - - delete [] tmp_buffer; - jpeg_destroy_decompress (&cinfo); - - return img.release(); -} - -#endif // defined(HAVE_JPEG) - -#if defined(HAVE_TIFF) - -/* -static tsize_t tiffReadWriteProc(thandle_t h, tdata_t ptr, tsize_t size) -{ - Stream * s = (Stream *)h; - nvDebugCheck(s != NULL); - - s->serialize(ptr, size); - - return size; -} - -static toff_t tiffSeekProc(thandle_t h, toff_t offset, int whence) -{ - Stream * s = (Stream *)h; - nvDebugCheck(s != NULL); - - if (!s->isSeekable()) - { - return (toff_t)-1; - } - - if (whence == SEEK_SET) - { - s->seek(offset); - } - else if (whence == SEEK_CUR) - { - s->seek(s->tell() + offset); - } - else if (whence == SEEK_END) - { - s->seek(s->size() + offset); - } - - return s->tell(); -} - -static int tiffCloseProc(thandle_t) -{ - return 0; -} - -static toff_t tiffSizeProc(thandle_t h) -{ - Stream * s = (Stream *)h; - nvDebugCheck(s != NULL); - return s->size(); -} - -static int tiffMapFileProc(thandle_t, tdata_t*, toff_t*) -{ - // @@ TODO, Implement these functions. - return -1; -} - -static void tiffUnmapFileProc(thandle_t, tdata_t, toff_t) -{ - // @@ TODO, Implement these functions. -} -*/ - -static FloatImage * loadFloatTIFF(const char * fileName, Stream & s) -{ - nvCheck(!s.isError()); - - TIFF * tif = TIFFOpen(fileName, "r"); - //TIFF * tif = TIFFClientOpen(fileName, "r", &s, tiffReadWriteProc, tiffReadWriteProc, tiffSeekProc, tiffCloseProc, tiffSizeProc, tiffMapFileProc, tiffUnmapFileProc); - - if (!tif) - { - nvDebug("Can't open '%s' for reading\n", fileName); - return NULL; - } - - ::uint16 spp, bpp, format; - ::uint32 width, height; - TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &height); - TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &width); - TIFFGetField(tif, TIFFTAG_BITSPERSAMPLE, &bpp); - TIFFGetField(tif, TIFFTAG_SAMPLESPERPIXEL, &spp); - TIFFGetField(tif, TIFFTAG_SAMPLEFORMAT, &format); - - if (bpp != 8 && bpp != 16 && bpp != 32) { - nvDebug("Can't load '%s', only 1 sample per pixel supported\n", fileName); - TIFFClose(tif); - return NULL; - } - - AutoPtr fimage(new FloatImage()); - fimage->allocate(spp, width, height); - - int linesize = TIFFScanlineSize(tif); - tdata_t buf = malloc(linesize); - - for (uint y = 0; y < height; y++) - { - TIFFReadScanline(tif, buf, y, 0); - - for (uint c=0; cscanline(y, c); - - for(uint x = 0; x < width; x++) - { - if (bpp == 8) - { - dst[x] = float(((::uint8 *)buf)[x*spp+c]) / float(0xFF); - } - else if (bpp == 16) - { - dst[x] = float(((::uint16 *)buf)[x*spp+c]) / float(0xFFFF); - } - else if (bpp == 32) - { - if (format==SAMPLEFORMAT_IEEEFP) - { - dst[x] = float(((float *)buf)[x*spp+c]); - } - else - { - dst[x] = float(((::uint32 *)buf)[x*spp+c] >> 8) / float(0xFFFFFF); - } - } - } - } - } - - free(buf); - - TIFFClose(tif); - - return fimage.release(); -} - -static bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) -{ - nvCheck(fileName != NULL); - nvCheck(fimage != NULL); - nvCheck(base_component + num_components <= fimage->componentCount()); - - const int iW = fimage->width(); - const int iH = fimage->height(); - const int iC = num_components; - - TIFF * image = TIFFOpen(fileName, "w"); - - // Open the TIFF file - if (image == NULL) - { - nvDebug("Could not open '%s' for writing\n", fileName); - return false; - } - - TIFFSetField(image, TIFFTAG_IMAGEWIDTH, iW); - TIFFSetField(image, TIFFTAG_IMAGELENGTH, iH); - TIFFSetField(image, TIFFTAG_SAMPLESPERPIXEL, iC); - TIFFSetField(image, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_IEEEFP); - TIFFSetField(image, TIFFTAG_BITSPERSAMPLE, 32); - - uint32 rowsperstrip = TIFFDefaultStripSize(image, (uint32)-1); - - TIFFSetField(image, TIFFTAG_ROWSPERSTRIP, rowsperstrip); - TIFFSetField(image, TIFFTAG_COMPRESSION, COMPRESSION_PACKBITS); - if (num_components == 3) - { - // Set this so that it can be visualized with pfstools. - TIFFSetField(image, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_RGB); - } - TIFFSetField(image, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT); - TIFFSetField(image, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG); - - float * scanline = new float[iW * iC]; - for (int y = 0; y < iH; y++) - { - for (int c = 0; c < iC; c++) - { - const float * src = fimage->scanline(y, base_component + c); - for (int x = 0; x < iW; x++) scanline[x * iC + c] = src[x]; - } - if (TIFFWriteScanline(image, scanline, y, 0)==-1) - { - nvDebug("Error writing scanline %d\n", y); - return false; - } - } - delete [] scanline; - - // Close the file - TIFFClose(image); - return true; -} - -#endif // defined(HAVE_TIFF) - -#if defined(HAVE_OPENEXR) - -namespace -{ - class ExrStream : public Imf::IStream - { - public: - ExrStream(const char * name, Stream & s) : Imf::IStream(name), m_stream(s) - { - nvDebugCheck(s.isLoading()); - } - - virtual bool read(char c[], int n) - { - m_stream.serialize(c, n); - - if (m_stream.isError()) - { - throw Iex::InputExc("I/O error."); - } - - return m_stream.isAtEnd(); - } - - virtual Imf::Int64 tellg() - { - return m_stream.tell(); - } - - virtual void seekg(Imf::Int64 pos) - { - nvDebugCheck(pos >= 0 && pos < UINT_MAX); - m_stream.seek((uint)pos); - } - - virtual void clear() - { - m_stream.clearError(); - } - - private: - Stream & m_stream; - }; - - static int channelIndexFromName(const char* name) - { - char c = tolower(name[0]); - switch (c) - { - default: - case 'r': - return 0; - case 'g': - return 1; - case 'b': - return 2; - case 'a': - return 3; - } - } - -} // namespace - -static FloatImage * loadFloatEXR(const char * fileName, Stream & s) -{ - nvCheck(s.isLoading()); - nvCheck(!s.isError()); - - ExrStream stream(fileName, s); - Imf::InputFile inputFile(stream); - - Imath::Box2i box = inputFile.header().dataWindow(); - - int width = box.max.x - box.min.y + 1; - int height = box.max.x - box.min.y + 1; - - const Imf::ChannelList & channels = inputFile.header().channels(); - - // Count channels. - uint channelCount= 0; - for (Imf::ChannelList::ConstIterator it = channels.begin(); it != channels.end(); ++it) - { - channelCount++; - } - - // Allocate FloatImage. - AutoPtr fimage(new FloatImage()); - fimage->allocate(channelCount, width, height); - - // Describe image's layout with a framebuffer. - Imf::FrameBuffer frameBuffer; - uint i = 0; - for (Imf::ChannelList::ConstIterator it = channels.begin(); it != channels.end(); ++it, ++i) - { - int channelIndex = channelIndexFromName(it.name()); - frameBuffer.insert(it.name(), Imf::Slice(Imf::FLOAT, (char *)fimage->channel(channelIndex), sizeof(float), sizeof(float) * width)); - } - - // Read it. - inputFile.setFrameBuffer (frameBuffer); - inputFile.readPixels (box.min.y, box.max.y); - - return fimage.release(); -} - -static bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) -{ - nvCheck(fileName != NULL); - nvCheck(fimage != NULL); - nvCheck(base_component + num_components <= fimage->componentCount()); - nvCheck(num_components > 0 && num_components <= 4); - - const int w = fimage->width(); - const int h = fimage->height(); - - const char * channelNames[] = {"R", "G", "B", "A"}; - - Imf::Header header (w, h); - - for (uint c = 0; c < num_components; c++) - { - header.channels().insert(channelNames[c], Imf::Channel(Imf::FLOAT)); - } - - Imf::OutputFile file(fileName, header); - Imf::FrameBuffer frameBuffer; - - for (uint c = 0; c < num_components; c++) - { - char * channel = (char *) fimage->channel(base_component + c); - frameBuffer.insert(channelNames[c], Imf::Slice(Imf::FLOAT, channel, sizeof(float), sizeof(float) * w)); - } - - file.setFrameBuffer(frameBuffer); - file.writePixels(h); - - return true; -} - -#endif // defined(HAVE_OPENEXR) - - -#if defined(HAVE_FREEIMAGE) - -static unsigned DLL_CALLCONV ReadProc(void *buffer, unsigned size, unsigned count, fi_handle handle) -{ - Stream * s = (Stream *) handle; - s->serialize(buffer, size * count); - return count; -} - -static unsigned DLL_CALLCONV WriteProc(void *buffer, unsigned size, unsigned count, fi_handle handle) -{ - Stream * s = (Stream *) handle; - s->serialize(buffer, size * count); - return count; -} - -static int DLL_CALLCONV SeekProc(fi_handle handle, long offset, int origin) -{ - Stream * s = (Stream *) handle; - - switch(origin) { - case SEEK_SET : - s->seek(offset); - break; - case SEEK_END : - s->seek(s->size() + offset); - break; - case SEEK_CUR : - s->seek(s->tell() + offset); - break; - default : - return 1; - } - - return 0; -} - -static long DLL_CALLCONV TellProc(fi_handle handle) -{ - Stream * s = (Stream *) handle; - return s->tell(); -} - - -Image * nv::ImageIO::loadFreeImage(FREE_IMAGE_FORMAT fif, Stream & s) -{ - nvCheck(!s.isError()); - - FreeImageIO io; - io.read_proc = ReadProc; - io.write_proc = NULL; - io.seek_proc = SeekProc; - io.tell_proc = TellProc; - - FIBITMAP * bitmap = FreeImage_LoadFromHandle(fif, &io, (fi_handle)&s, 0); - - if (bitmap == NULL) - { - return NULL; - } - - const int w = FreeImage_GetWidth(bitmap); - const int h = FreeImage_GetHeight(bitmap); - - if (FreeImage_GetImageType(bitmap) != FIT_BITMAP) - { - // @@ Use tone mapping? - FIBITMAP * tmp = FreeImage_ConvertToType(bitmap, FIT_BITMAP, true); - FreeImage_Unload(bitmap); - bitmap = tmp; - } - - nvDebugCheck(FreeImage_GetImageType(bitmap) == FIT_BITMAP); - if (FreeImage_GetBPP(bitmap) != 32) - { - FIBITMAP * tmp = FreeImage_ConvertTo32Bits(bitmap); - FreeImage_Unload(bitmap); - bitmap = tmp; - } - - - Image * image = new Image(); - image->allocate(w, h, 1); // freeimage can only load 2d images: - - // Copy the image over to our internal format, FreeImage has the scanlines bottom to top though. - for (int y=0; y < h; y++) - { - const void * src = FreeImage_GetScanLine(bitmap, h - y - 1); - void * dst = image->scanline(y); - - memcpy(dst, src, 4 * w); - } - - FreeImage_Unload(bitmap); - - return image; -} - -FloatImage * nv::ImageIO::loadFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s) -{ - nvCheck(!s.isError()); - - FreeImageIO io; - io.read_proc = ReadProc; - io.write_proc = NULL; - io.seek_proc = SeekProc; - io.tell_proc = TellProc; - - FIBITMAP * bitmap = FreeImage_LoadFromHandle(fif, &io, (fi_handle)&s, 0); - - if (bitmap == NULL) - { - return NULL; - } - - const int w = FreeImage_GetWidth(bitmap); - const int h = FreeImage_GetHeight(bitmap); - - FREE_IMAGE_TYPE fit = FreeImage_GetImageType(bitmap); - - FloatImage * floatImage = new FloatImage(); - - switch (fit) - { - case FIT_BITMAP: - floatImage->allocate(4, w, h); - { - FIBITMAP * tmp = FreeImage_ConvertTo32Bits(bitmap); - - uint bitcount = FreeImage_GetBPP(bitmap); - uint byteCount = bitcount / 8; - - for (int y=0; y < h; y++) - { - const Color32 * src = (const Color32 *)FreeImage_GetScanLine(bitmap, h - y - 1 ); - - float * r = floatImage->scanline(y, 0); - float * g = floatImage->scanline(y, 1); - float * b = floatImage->scanline(y, 2); - float * a = floatImage->scanline(y, 3); - - for (int x=0; x < w; x++) - { - r[x] = float(src[x].r) / 255.0f; - g[x] = float(src[x].g) / 255.0f; - b[x] = float(src[x].b) / 255.0f; - a[x] = float(src[x].a) / 255.0f; - } - - src += byteCount; - } - - FreeImage_Unload(tmp); - } - break; - case FIT_FLOAT: - floatImage->allocate(1, w, h); - - for (int y=0; y < h; y++) - { - const float * src = (const float *)FreeImage_GetScanLine(bitmap, h - y - 1 ); - float * dst = floatImage->scanline(y, 0); - - for (int x=0; x < w; x++) - { - dst[x] = src[x]; - } - } - break; - case FIT_UINT16: - floatImage->allocate(1, w, h); - - for (int y=0; y < h; y++) - { - const uint16 * src = (const uint16 *)FreeImage_GetScanLine(bitmap, h - y - 1 ); - float * dst = floatImage->scanline(y, 0); - - for (int x=0; x < w; x++) - { - dst[x] = float(src[x]) / 65535; - } - } - break; - case FIT_COMPLEX: - floatImage->allocate(2, w, h); - - for (int y=0; y < h; y++) - { - const FICOMPLEX * src = (const FICOMPLEX *)FreeImage_GetScanLine(bitmap, h - y - 1 ); - - float * dst_real = floatImage->scanline(y, 0); - float * dst_imag = floatImage->scanline(y, 1); - - for (int x=0; x < w; x++) - { - dst_real[x] = (float)src[x].r; - dst_imag[x] = (float)src[x].i; - } - } - break; - case FIT_RGBF: - floatImage->allocate(3, w, h); - - for (int y=0; y < h; y++) - { - const FIRGBF * src = (const FIRGBF *)FreeImage_GetScanLine(bitmap, h - y - 1 ); - - float * dst_red = floatImage->scanline(y, 0); - float * dst_green = floatImage->scanline(y, 1); - float * dst_blue = floatImage->scanline(y, 2); - - for (int x=0; x < w; x++) - { - dst_red[x] = src[x].red; - dst_green[x] = src[x].green; - dst_blue[x] = src[x].blue; - } - } - break; - case FIT_RGBAF: - floatImage->allocate(4, w, h); - - for (int y=0; y < h; y++) - { - const FIRGBAF * src = (const FIRGBAF *)FreeImage_GetScanLine(bitmap, h - y - 1 ); - - float * dst_red = floatImage->scanline(y, 0); - float * dst_green = floatImage->scanline(y, 1); - float * dst_blue = floatImage->scanline(y, 2); - float * dst_alpha = floatImage->scanline(y, 3); - - for (int x=0; x < w; x++) - { - dst_red[x] = src[x].red; - dst_green[x] = src[x].green; - dst_blue[x] = src[x].blue; - dst_alpha[x] = src[x].alpha; - } - } - break; - default: - delete floatImage; - floatImage = NULL; - } - - FreeImage_Unload(bitmap); - - return floatImage; -} - -bool nv::ImageIO::saveFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const Image * img, const char ** tags) -{ - nvCheck(!s.isError()); - - FreeImageIO io; - io.read_proc = NULL; - io.write_proc = WriteProc; - io.seek_proc = SeekProc; - io.tell_proc = TellProc; - - const uint w = img->width(); - const uint h = img->height(); - - FIBITMAP * bitmap = FreeImage_Allocate(w, h, 32); - - for (uint i = 0; i < h; i++) - { - uint8 * scanline = FreeImage_GetScanLine(bitmap, i); - memcpy(scanline, img->scanline(h - i - 1), w * sizeof(Color32)); - } - - if (tags != NULL) - { - #pragma NV_MESSAGE("TODO: Save image metadata") - //FreeImage_SetMetadata( - } - - bool result = FreeImage_SaveToHandle(fif, bitmap, &io, (fi_handle)&s, 0) != 0; - - FreeImage_Unload(bitmap); - - return result; -} - -bool nv::ImageIO::saveFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const FloatImage * img, uint baseComponent, uint componentCount) -{ - nvCheck(!s.isError()); - - FreeImageIO io; - io.read_proc = NULL; - io.write_proc = WriteProc; - io.seek_proc = SeekProc; - io.tell_proc = TellProc; - - const uint w = img->width(); - const uint h = img->height(); - - FREE_IMAGE_TYPE type; - if (componentCount == 1) - { - type = FIT_FLOAT; - } - else if (componentCount == 3) - { - type = FIT_RGBF; - } - else if (componentCount == 4) - { - type = FIT_RGBAF; - } - else { - return false; - } - - - FIBITMAP * bitmap = FreeImage_AllocateT(type, w, h); - - for (uint y = 0; y < h; y++) - { - float * scanline = (float *)FreeImage_GetScanLine(bitmap, y); - - for (uint x = 0; x < w; x++) - { - for (uint c = 0; c < componentCount; c++) - { - scanline[x * componentCount + c] = img->pixel(x, y, baseComponent + c); - } - } - } - - bool result = FreeImage_SaveToHandle(fif, bitmap, &io, (fi_handle)&s, 0) != 0; - - FreeImage_Unload(bitmap); - - return result; -} - -#endif // defined(HAVE_FREEIMAGE) - - -#if defined(HAVE_STBIMAGE) - -static Image * loadSTB(Stream & s) -{ - // @@ Assumes stream cursor is at the beginning and that image occupies the whole stream. - const int size = s.size(); - uint8 * buffer = new uint8[size]; - - s.serialize(buffer, size); - - int w, h, n; - uint8 * data = stbi_load_from_memory(buffer, size, &w, &h, &n, 4); - - delete buffer; - - if (data != NULL) { - Image * img = new Image; - img->allocate(w, h); - img->setFormat(n == 4 ? Image::Format_ARGB : Image::Format_RGB); - - for (int y = 0; y < h; ++y) - { - nv::Color32* dest = img->scanline(y); - uint8* src = data + y * w * 4; - - for (int x = 0; x < w; ++x) - { - dest[x].r = src[x * 4 + 0]; - dest[x].g = src[x * 4 + 1]; - dest[x].b = src[x * 4 + 2]; - dest[x].a = src[x * 4 + 3]; - } - } - - free(data); - - return img; - } - - return NULL; -} - -static FloatImage * loadFloatSTB(Stream & s) -{ - // @@ Assumes stream cursor is at the beginning and that image occupies the whole stream. - const int size = s.size(); - uint8 * buffer = new uint8[size]; - - s.serialize(buffer, size); - - int w, h, n; - float * data = stbi_loadf_from_memory(buffer, size, &w, &h, &n, 0); - - delete buffer; - - // Copy to image. - if (data != NULL) { - FloatImage * img = new FloatImage; - img->allocate(n, w, h); - - const int count = w * h; - - for (int c = 0; c < n; c++) { - float * dst = img->channel(c); - - for (int i = 0; i < count; i++) { - dst[i] = data[i*n + c]; - } - } - return img; - } - - return NULL; -} - -#endif // defined(HAVE_STBIMAGE) - - - - - -Image * nv::ImageIO::load(const char * fileName) -{ - nvDebugCheck(fileName != NULL); - - StdInputStream stream(fileName); - - if (stream.isError()) { - return NULL; - } - - return ImageIO::load(fileName, stream); -} - -Image * nv::ImageIO::load(const char * fileName, Stream & s) -{ - nvDebugCheck(fileName != NULL); - nvDebugCheck(s.isLoading()); - - const char * extension = Path::extension(fileName); - - if (strCaseCmp(extension, ".tga") == 0) { - return loadTGA(s); - } - - if (strCaseCmp(extension, ".psd") == 0) { - return loadPSD(s); - } - - /*if (strCaseCmp(extension, ".ppm") == 0) { - return loadPPM(s); - }*/ - -#if defined(HAVE_JPEG) - if (strCaseCmp(extension, ".jpg") == 0 || strCaseCmp(extension, ".jpeg") == 0) { - return loadJPG(s); - } -#endif - -#if defined(HAVE_PNG) - if (strCaseCmp(extension, ".png") == 0) { - return loadPNG(s); - } -#endif - -#if defined(HAVE_FREEIMAGE) - FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); - if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { - return loadFreeImage(fif, s); - } -#endif - -#if defined(HAVE_STBIMAGE) - return loadSTB(s); -#endif - - return NULL; -} - -bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, const char ** tags/*=NULL*/) -{ - nvDebugCheck(fileName != NULL); - nvDebugCheck(s.isSaving()); - nvDebugCheck(img != NULL); - - const char * extension = Path::extension(fileName); - - if (strCaseCmp(extension, ".tga") == 0) { - return saveTGA(s, img); - } - - if (strCaseCmp(extension, ".ppm") == 0) { - return savePPM(s, img); - } - -#if defined(HAVE_PNG) - if (strCaseCmp(extension, ".png") == 0) { - return savePNG(s, img, tags); - } -#endif - -#if defined(HAVE_FREEIMAGE) - FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); - if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { - return saveFreeImage(fif, s, img, tags); - } -#endif - - return false; -} - -bool nv::ImageIO::save(const char * fileName, const Image * img, const char ** tags/*=NULL*/) -{ - nvDebugCheck(fileName != NULL); - nvDebugCheck(img != NULL); - - StdOutputStream stream(fileName); - if (stream.isError()) - { - return false; - } - - return ImageIO::save(fileName, stream, img, tags); -} - -FloatImage * nv::ImageIO::loadFloat(const char * fileName) -{ - nvDebugCheck(fileName != NULL); - - StdInputStream stream(fileName); - - if (stream.isError()) { - return NULL; - } - - return loadFloat(fileName, stream); -} - -FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s) -{ - nvDebugCheck(fileName != NULL); - - const char * extension = Path::extension(fileName); - - /*if (strCaseCmp(extension, ".pfm") == 0) { - return loadFloatPFM(s); - }*/ - -#if defined(HAVE_TIFF) - #pragma NV_MESSAGE("TODO: Load TIFF from stream.") - if (strCaseCmp(extension, ".tif") == 0 || strCaseCmp(extension, ".tiff") == 0) { - return loadFloatTIFF(fileName, s); - } -#endif - -#if defined(HAVE_OPENEXR) - #pragma NV_MESSAGE("TODO: Load EXR from stream.") - if (strCaseCmp(extension, ".exr") == 0) { - return loadFloatEXR(fileName, s); - } -#endif - -#if defined(HAVE_FREEIMAGE) - FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); - if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { - return loadFloatFreeImage(fif, s); - } -#endif - - if (strCaseCmp(extension, ".dds") == 0) { - const uint spos = s.tell(); // Save stream position. - FloatImage * floatImage = loadFloatDDS(s); - if (floatImage != NULL) return floatImage; - else s.seek(spos); - } - - // Try to load as an RGBA8 image and convert to float. - AutoPtr img(load(fileName, s)); - if (img != NULL) { - return new FloatImage(img.ptr()); - } - - return NULL; -} - -bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage * fimage, uint baseComponent, uint componentCount) -{ - if (componentCount == 0) { - componentCount = fimage->componentCount() - baseComponent; - } - if (baseComponent + componentCount < fimage->componentCount()) { - return false; - } - - const char * extension = Path::extension(fileName); - - if (strCaseCmp(extension, ".dds") == 0) { - return saveFloatDDS(s, fimage, baseComponent, componentCount); - } - - /*if (strCaseCmp(extension, ".pfm") == 0) { - return saveFloatPFM(s, fimage, baseComponent, componentCount); - }*/ - -#if defined(HAVE_FREEIMAGE) - FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); - if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { - return saveFloatFreeImage(fif, s, fimage, baseComponent, componentCount); - } -#endif - - // If everything else fails, save as LDR. - if (componentCount <= 4) - { - AutoPtr image(fimage->createImage(baseComponent, componentCount)); - nvCheck(image != NULL); - - if (componentCount == 1) - { - Color32 * c = image->pixels(); - const uint count = image->width() * image->height(); - for (uint i = 0; i < count; i++) - { - c[i].b = c[i].g = c[i].r; - } - } - - if (componentCount == 4) - { - image->setFormat(Image::Format_ARGB); - } - - return ImageIO::save(fileName, s, image.ptr()); - } - - return false; -} - -bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount) -{ - if (componentCount == 0) { - componentCount = fimage->componentCount() - baseComponent; - } - if (baseComponent + componentCount < fimage->componentCount()) { - return false; - } - - const char * extension = Path::extension(fileName); - -#if defined(HAVE_OPENEXR) - if (strCaseCmp(extension, ".exr") == 0) { - return saveFloatEXR(fileName, fimage, baseComponent, componentCount); - } -#endif - -#if defined(HAVE_TIFF) - if (strCaseCmp(extension, ".tif") == 0 || strCaseCmp(extension, ".tiff") == 0) { - return saveFloatTIFF(fileName, fimage, baseComponent, componentCount); - } -#endif - - StdOutputStream stream(fileName); - - if (stream.isError()) { - return false; - } - - return saveFloat(fileName, stream, fimage, baseComponent, componentCount); -} +// This code is in the public domain -- castanyo@yahoo.es + +#include "ImageIO.h" +#include "Image.h" +#include "FloatImage.h" +#include "TgaFile.h" +#include "PsdFile.h" +#include "DirectDrawSurface.h" +#include "PixelFormat.h" + +#include "nvmath/Color.h" +#include "nvmath/Half.h" + +#include "nvcore/Ptr.h" +#include "nvcore/Utils.h" +#include "nvcore/Array.h" +#include "nvcore/StrLib.h" +#include "nvcore/StdStream.h" +#include "nvcore/TextWriter.h" + +// Extern +#if defined(HAVE_FREEIMAGE) +# include +// If FreeImage available, do not use individual libraries, since that produces link conflicts in some platforms. +# undef HAVE_JPEG +# undef HAVE_PNG +# undef HAVE_TIFF +# undef HAVE_OPENEXR +#endif + +#if defined(HAVE_JPEG) +extern "C" { +# include +} +#endif + +#if defined(HAVE_PNG) +# include +#endif + +#if defined(HAVE_TIFF) +# define _TIFF_DATA_TYPEDEFS_ +# include +#endif + +#if defined(HAVE_OPENEXR) +# include +# include +# include +# include +# include +# include +#endif + +#if defined(HAVE_STBIMAGE) +# define STBI_NO_STDIO +# include +#endif + + +using namespace nv; + + + +struct Color555 { + uint16 b : 5; + uint16 g : 5; + uint16 r : 5; +}; + +// Load TGA image. +static Image * loadTGA(Stream & s) +{ + nvCheck(!s.isError()); + nvCheck(s.isLoading()); + + TgaHeader tga; + s << tga; + s.seek(TgaHeader::Size + tga.id_length); + + // Get header info. + bool rle = false; + bool pal = false; + bool rgb = false; + bool grey = false; + + switch( tga.image_type ) { + case TGA_TYPE_RLE_INDEXED: + rle = true; + // no break is intended! + case TGA_TYPE_INDEXED: + if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) { + nvDebug( "*** loadTGA: Error, only 24bit paletted images are supported.\n" ); + return NULL; + } + pal = true; + break; + + case TGA_TYPE_RLE_RGB: + rle = true; + // no break is intended! + case TGA_TYPE_RGB: + rgb = true; + break; + + case TGA_TYPE_RLE_GREY: + rle = true; + // no break is intended! + case TGA_TYPE_GREY: + grey = true; + break; + + default: + nvDebug( "*** loadTGA: Error, unsupported image type.\n" ); + return NULL; + } + + const uint pixel_size = (tga.pixel_size/8); + nvDebugCheck(pixel_size <= 4); + + const uint size = tga.width * tga.height * pixel_size; + + + // Read palette + uint8 palette[768]; + if( pal ) { + nvDebugCheck(tga.colormap_length <= 256); + s.serialize(palette, 3 * tga.colormap_length); + } + + // Decode image. + uint8 * mem = new uint8[size]; + if( rle ) { + // Decompress image in src. + uint8 * dst = mem; + int num = size; + + while (num > 0) { + // Get packet header + uint8 c; + s << c; + + uint count = (c & 0x7f) + 1; + num -= count * pixel_size; + + if (c & 0x80) { + // RLE pixels. + uint8 pixel[4]; // uint8 pixel[pixel_size]; + s.serialize( pixel, pixel_size ); + do { + memcpy(dst, pixel, pixel_size); + dst += pixel_size; + } while (--count); + } + else { + // Raw pixels. + count *= pixel_size; + //file->Read8(dst, count); + s.serialize(dst, count); + dst += count; + } + } + } + else { + s.serialize(mem, size); + } + + // Allocate image. + AutoPtr img(new Image()); + img->allocate(tga.width, tga.height); + + int lstep; + Color32 * dst; + if( tga.flags & TGA_ORIGIN_UPPER ) { + lstep = tga.width; + dst = img->pixels(); + } + else { + lstep = - tga.width; + dst = img->pixels() + (tga.height-1) * tga.width; + } + + // Write image. + uint8 * src = mem; + if( pal ) { + for( int y = 0; y < tga.height; y++ ) { + for( int x = 0; x < tga.width; x++ ) { + uint8 idx = *src++; + dst[x].setBGRA(palette[3*idx+0], palette[3*idx+1], palette[3*idx+2], 0xFF); + } + dst += lstep; + } + } + else if( grey ) { + img->setFormat(Image::Format_ARGB); + + for( int y = 0; y < tga.height; y++ ) { + for( int x = 0; x < tga.width; x++ ) { + dst[x].setBGRA(*src, *src, *src, *src); + src++; + } + dst += lstep; + } + } + else { + + if( tga.pixel_size == 16 ) { + for( int y = 0; y < tga.height; y++ ) { + for( int x = 0; x < tga.width; x++ ) { + Color555 c = *reinterpret_cast(src); + uint8 b = (c.b << 3) | (c.b >> 2); + uint8 g = (c.g << 3) | (c.g >> 2); + uint8 r = (c.r << 3) | (c.r >> 2); + dst[x].setBGRA(b, g, r, 0xFF); + src += 2; + } + dst += lstep; + } + } + else if( tga.pixel_size == 24 ) { + for( int y = 0; y < tga.height; y++ ) { + for( int x = 0; x < tga.width; x++ ) { + dst[x].setBGRA(src[0], src[1], src[2], 0xFF); + src += 3; + } + dst += lstep; + } + } + else if( tga.pixel_size == 32 ) { + img->setFormat(Image::Format_ARGB); + + for( int y = 0; y < tga.height; y++ ) { + for( int x = 0; x < tga.width; x++ ) { + dst[x].setBGRA(src[0], src[1], src[2], src[3]); + src += 4; + } + dst += lstep; + } + } + } + + // free uncompressed data. + delete [] mem; + + return img.release(); +} + +// Save TGA image. +static bool saveTGA(Stream & s, const Image * img) +{ + nvCheck(!s.isError()); + nvCheck(img != NULL); + nvCheck(img->pixels() != NULL); + + TgaFile tga; + tga.head.id_length = 0; + tga.head.colormap_type = 0; + tga.head.image_type = TGA_TYPE_RGB; + + tga.head.colormap_index = 0; + tga.head.colormap_length = 0; + tga.head.colormap_size = 0; + + tga.head.x_origin = 0; + tga.head.y_origin = 0; + tga.head.width = img->width(); + tga.head.height = img->height(); + if(img->format() == Image::Format_ARGB) { + tga.head.pixel_size = 32; + tga.head.flags = TGA_ORIGIN_UPPER | TGA_HAS_ALPHA; + } + else { + tga.head.pixel_size = 24; + tga.head.flags = TGA_ORIGIN_UPPER; + } + + // @@ Serialize directly. + tga.allocate(); + + const uint n = img->width() * img->height(); + if(img->format() == Image::Format_ARGB) { + for(uint i = 0; i < n; i++) { + Color32 color = img->pixel(i); + tga.mem[4 * i + 0] = color.b; + tga.mem[4 * i + 1] = color.g; + tga.mem[4 * i + 2] = color.r; + tga.mem[4 * i + 3] = color.a; + } + } + else { + for(uint i = 0; i < n; i++) { + Color32 color = img->pixel(i); + tga.mem[3 * i + 0] = color.b; + tga.mem[3 * i + 1] = color.g; + tga.mem[3 * i + 2] = color.r; + } + } + + s << tga; + + tga.free(); + + return true; +} + +/*static Image * loadPPM(Stream & s) +{ + // @@ + return NULL; +}*/ + +// Save PPM image. +static bool savePPM(Stream & s, const Image * img) +{ + //if (img->depth() != 1) return false; + //if (img->format() == Image::Format_ARGB) return false; + + uint w = img->width(); + uint h = img->height(); + + TextWriter writer(&s); + writer.write("P6\n"); + writer.write("%d %d\n", w, h); + writer.write("255\n"); + for (uint i = 0; i < w * h; i++) { + Color32 c = img->pixel(i); + s << c.r << c.g << c.b; + } + + return true; +} + + +/*static FloatImage * loadFloatPFM(Stream & s) +{ + return NULL; +}*/ + +/*static bool saveFloatPFM(Stream & s, const FloatImage * img, uint base_channel, uint channel_count) +{ + return false; +}*/ + +// Load PSD image. +static Image * loadPSD(Stream & s) +{ + nvCheck(!s.isError()); + nvCheck(s.isLoading()); + + s.setByteOrder(Stream::BigEndian); + + PsdHeader header; + s << header; + + if (!header.isValid()) + { + printf("invalid header!\n"); + return NULL; + } + + if (!header.isSupported()) + { + printf("unsupported file!\n"); + return NULL; + } + + int tmp; + + // Skip mode data. + s << tmp; + s.seek(s.tell() + tmp); + + // Skip image resources. + s << tmp; + s.seek(s.tell() + tmp); + + // Skip the reserved data. + s << tmp; + s.seek(s.tell() + tmp); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + uint16 compression; + s << compression; + + if (compression > 1) { + // Unknown compression type. + return NULL; + } + + uint channel_num = header.channel_count; + + AutoPtr img(new Image()); + img->allocate(header.width, header.height); + + if (channel_num < 4) + { + // Clear the image. + img->fill(Color32(0, 0, 0, 0xFF)); + } + else + { + // Enable alpha. + img->setFormat(Image::Format_ARGB); + + // Ignore remaining channels. + channel_num = 4; + } + + + const uint pixel_count = header.height * header.width; + + static const uint components[4] = {2, 1, 0, 3}; + + if (compression) + { + s.seek(s.tell() + header.height * header.channel_count * sizeof(uint16)); + + // Read RLE data. + for (uint channel = 0; channel < channel_num; channel++) + { + uint8 * ptr = (uint8 *)img->pixels() + components[channel]; + + uint count = 0; + while( count < pixel_count ) + { + if (s.isAtEnd()) return NULL; + + uint8 c; + s << c; + + uint len = c; + if (len < 128) + { + // Copy next len+1 bytes literally. + len++; + count += len; + if (count > pixel_count) return NULL; + + while (len != 0) + { + s << *ptr; + ptr += 4; + len--; + } + } + else if (len > 128) + { + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len ^= 0xFF; + len += 2; + count += len; + if (s.isAtEnd() || count > pixel_count) return NULL; + + uint8 val; + s << val; + while( len != 0 ) { + *ptr = val; + ptr += 4; + len--; + } + } + else if( len == 128 ) { + // No-op. + } + } + } + } + else + { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit value for each pixel in the image. + + // Read the data by channel. + for (uint channel = 0; channel < channel_num; channel++) + { + uint8 * ptr = (uint8 *)img->pixels() + components[channel]; + + // Read the data. + uint count = pixel_count; + while (count != 0) + { + s << *ptr; + ptr += 4; + count--; + } + } + } + + return img.release(); +} + +static FloatImage * loadFloatDDS(Stream & s) +{ + nvCheck(s.isLoading()); + nvCheck(!s.isError()); + + DDSHeader header; + s << header; + + static const uint D3DFMT_A16B16G16R16F = 113; + + // @@ We only support RGBA16F for now. + if (header.pf.fourcc == D3DFMT_A16B16G16R16F) { + const int size = header.width * header.height; + uint16 * const data = new uint16[size * 4]; + + s.serialize(data, size * 4 * sizeof(uint16)); + + FloatImage * img = new FloatImage; + img->allocate(4, header.width, header.height); + + uint32 * r = (uint32 *)img->channel(0); + uint32 * g = (uint32 *)img->channel(1); + uint32 * b = (uint32 *)img->channel(2); + uint32 * a = (uint32 *)img->channel(3); + + uint16 * ptr = data; + for (int i = 0; i < size; i++) { + *r++ = half_to_float( *ptr++ ); + *g++ = half_to_float( *ptr++ ); + *b++ = half_to_float( *ptr++ ); + *a++ = half_to_float( *ptr++ ); + } + + delete [] data; + + return img; + } + + return NULL; +} + +static bool saveFloatDDS(Stream & s, const FloatImage * img, uint base_component, uint num_components) +{ + nvCheck(s.isSaving()); + nvCheck(!s.isError()); + + if (num_components != 4) return false; + + static const uint D3DFMT_A16B16G16R16F = 113; + + DDSHeader header; + header.setTexture2D(); + header.setWidth(img->width()); + header.setHeight(img->height()); + header.setFormatCode(D3DFMT_A16B16G16R16F); + // ... + + s << header; + + uint32 * r = (uint32 *)img->channel(base_component + 0); + uint32 * g = (uint32 *)img->channel(base_component + 1); + uint32 * b = (uint32 *)img->channel(base_component + 2); + uint32 * a = (uint32 *)img->channel(base_component + 3); + + const uint size = img->width() * img->height(); + for (uint i = 0; i < size; i++) { + uint16 R = half_from_float( *r++ ); + uint16 G = half_from_float( *g++ ); + uint16 B = half_from_float( *b++ ); + uint16 A = half_from_float( *a++ ); + + s.serialize(&R, sizeof(uint16)); + s.serialize(&G, sizeof(uint16)); + s.serialize(&B, sizeof(uint16)); + s.serialize(&A, sizeof(uint16)); + } + + return true; +} + + +#if defined(HAVE_PNG) + +static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length) +{ + nvDebugCheck(png_ptr != NULL); + + Stream * s = (Stream *)png_get_io_ptr(png_ptr); + s->serialize(data, (int)length); + + if (s->isError()) { + png_error(png_ptr, "Read Error"); + } +} + + +static Image * loadPNG(Stream & s) +{ + nvCheck(!s.isError()); + + // Set up a read buffer and check the library version + png_structp png_ptr; + png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + if (png_ptr == NULL) { + // nvDebug( "*** LoadPNG: Error allocating read buffer in file '%s'.\n", name ); + return false; + } + + // Allocate/initialize a memory block for the image information + png_infop info_ptr = png_create_info_struct(png_ptr); + if (info_ptr == NULL) { + png_destroy_read_struct(&png_ptr, NULL, NULL); + // nvDebug( "*** LoadPNG: Error allocating image information for '%s'.\n", name ); + return false; + } + + // Set up the error handling + if (setjmp(png_jmpbuf(png_ptr))) { + png_destroy_read_struct(&png_ptr, &info_ptr, NULL); + // nvDebug( "*** LoadPNG: Error reading png file '%s'.\n", name ); + return false; + } + + // Set up the I/O functions. + png_set_read_fn(png_ptr, (void*)&s, user_read_data); + + + // Retrieve the image header information + png_uint_32 width, height; + int bit_depth, color_type, interlace_type; + png_read_info(png_ptr, info_ptr); + png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, &interlace_type, NULL, NULL); + + + if (color_type == PNG_COLOR_TYPE_PALETTE && bit_depth <= 8) { + // Convert indexed images to RGB. + png_set_expand(png_ptr); + } + else if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) { + // Convert grayscale to RGB. + png_set_expand(png_ptr); + } + else if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) { + // Expand images with transparency to full alpha channels + // so the data will be available as RGBA quartets. + png_set_expand(png_ptr); + } + else if (bit_depth < 8) { + // If we have < 8 scale it up to 8. + //png_set_expand(png_ptr); + png_set_packing(png_ptr); + } + + // Reduce bit depth. + if (bit_depth == 16) { + png_set_strip_16(png_ptr); + } + + // Represent gray as RGB + if (color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_GRAY_ALPHA) { + png_set_gray_to_rgb(png_ptr); + } + + // Convert to RGBA filling alpha with 0xFF. + if (!(color_type & PNG_COLOR_MASK_ALPHA)) { + png_set_filler(png_ptr, 0xFF, PNG_FILLER_AFTER); + } + + // @todo Choose gamma according to the platform? + double screen_gamma = 2.2; + int intent; + if (png_get_sRGB(png_ptr, info_ptr, &intent)) { + png_set_gamma(png_ptr, screen_gamma, 0.45455); + } + else { + double image_gamma; + if (png_get_gAMA(png_ptr, info_ptr, &image_gamma)) { + png_set_gamma(png_ptr, screen_gamma, image_gamma); + } + else { + png_set_gamma(png_ptr, screen_gamma, 0.45455); + } + } + + // Perform the selected transforms. + png_read_update_info(png_ptr, info_ptr); + + png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, &interlace_type, NULL, NULL); + + AutoPtr img(new Image()); + img->allocate(width, height); + + // Set internal format flags. + if(color_type & PNG_COLOR_MASK_COLOR) { + //img->flags |= PI_IF_HAS_COLOR; + } + if(color_type & PNG_COLOR_MASK_ALPHA) { + //img->flags |= PI_IF_HAS_ALPHA; + img->setFormat(Image::Format_ARGB); + } + + // Read the image + uint8 * pixels = (uint8 *)img->pixels(); + png_bytep * row_data = new png_bytep[sizeof(png_byte) * height]; + for (uint i = 0; i < height; i++) { + row_data[i] = &(pixels[width * 4 * i]); + } + + png_read_image(png_ptr, row_data); + delete [] row_data; + + // Finish things up + png_read_end(png_ptr, info_ptr); + png_destroy_read_struct(&png_ptr, &info_ptr, NULL); + + // RGBA to BGRA. + uint num = width * height; + for(uint i = 0; i < num; i++) + { + Color32 c = img->pixel(i); + img->pixel(i) = Color32(c.b, c.g, c.r, c.a); + } + + // Compute alpha channel if needed. + /*if( img->flags & PI_IU_BUMPMAP || img->flags & PI_IU_ALPHAMAP ) { + if( img->flags & PI_IF_HAS_COLOR && !(img->flags & PI_IF_HAS_ALPHA)) { + img->ComputeAlphaFromColor(); + } + }*/ + + return img.release(); +} + +static void user_write_data(png_structp png_ptr, png_bytep data, png_size_t length) +{ + nvDebugCheck(png_ptr != NULL); + + Stream * s = (Stream *)png_get_io_ptr(png_ptr); + s->serialize(data, (int)length); + + if (s->isError()) { + png_error(png_ptr, "Write Error"); + } +} + +static void user_write_flush(png_structp png_ptr) { } + +static bool savePNG(Stream & s, const Image * img, const char ** tags/*=NULL*/) +{ + nvCheck(!s.isError()); + nvCheck(img != NULL); + nvCheck(img->pixels() != NULL); + + // Set up a write buffer and check the library version + png_structp png_ptr; + png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + if (png_ptr == NULL) { + return false; + } + + // Allocate/initialize a memory block for the image information + png_infop info_ptr = png_create_info_struct(png_ptr); + if (info_ptr == NULL) { + png_destroy_write_struct(&png_ptr, NULL); + return false; + } + + // Set up the error handling + if (setjmp(png_jmpbuf(png_ptr))) { + png_destroy_write_struct(&png_ptr, &info_ptr); + return false; + } + + // Set up the I/O functions. + png_set_write_fn(png_ptr, (void*)&s, user_write_data, user_write_flush); + + // Set image header information + int color_type = PNG_COLOR_TYPE_RGBA; + switch(img->format()) + { + case Image::Format_RGB: color_type = PNG_COLOR_TYPE_RGB; break; + case Image::Format_ARGB: color_type = PNG_COLOR_TYPE_RGBA; break; + } + png_set_IHDR(png_ptr, info_ptr, img->width(), img->height(), + 8, color_type, PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_DEFAULT, + PNG_FILTER_TYPE_DEFAULT); + + // Set image data + png_bytep * row_data = new png_bytep[sizeof(png_byte) * img->height()]; + for (uint i = 0; i < img->height(); i++) { + row_data[i] = (png_byte*)img->scanline (i); + if (img->format() == Image::Format_RGB) row_data[i]--; // This is a bit of a hack, libpng expects images in ARGB format not BGRA, it supports BGR swapping, but not alpha swapping. + } + png_set_rows(png_ptr, info_ptr, row_data); + + png_text * text = NULL; + if (tags != NULL) + { + int count = 0; + while(tags[2 * count] != NULL) count++; + + text = new png_text[count]; + memset(text, 0, count * sizeof(png_text); + + for (int i = 0; i < count; i++) { + text[i].compression = PNG_TEXT_COMPRESSION_NONE; + text[i].key = tags[2 * i + 0]; + text[i].text = tags[2 * i + 1]; + } + + png_set_text(png_ptr, info_ptr, text, count); + } + + png_write_png(png_ptr, info_ptr, + // component order is BGR(A) + PNG_TRANSFORM_BGR | + // Strip alpha byte for RGB images + (img->format() == Image::Format_RGB ? PNG_TRANSFORM_STRIP_FILLER : 0) + , NULL); + + // Finish things up + png_destroy_write_struct(&png_ptr, &info_ptr); + + delete [] row_data; + delete [] text; + + return true; +} + +#endif // defined(HAVE_PNG) + +#if defined(HAVE_JPEG) + +static void init_source (j_decompress_ptr /*cinfo*/){ +} + +static boolean fill_input_buffer (j_decompress_ptr cinfo) { + struct jpeg_source_mgr * src = cinfo->src; + static JOCTET FakeEOI[] = { 0xFF, JPEG_EOI }; + + // Generate warning + nvDebug("jpeglib: Premature end of file\n"); + + // Insert a fake EOI marker + src->next_input_byte = FakeEOI; + src->bytes_in_buffer = 2; + + return TRUE; +} + +static void skip_input_data (j_decompress_ptr cinfo, long num_bytes) { + struct jpeg_source_mgr * src = cinfo->src; + + if(num_bytes >= (long)src->bytes_in_buffer) { + fill_input_buffer(cinfo); + return; + } + + src->bytes_in_buffer -= num_bytes; + src->next_input_byte += num_bytes; +} + +static void term_source (j_decompress_ptr /*cinfo*/){ + // no work necessary here +} + + +static Image * loadJPG(Stream & s) +{ + nvCheck(!s.isError()); + + // Read the entire file. + Array byte_array; + byte_array.resize(s.size()); + s.serialize(byte_array.buffer(), s.size()); + + jpeg_decompress_struct cinfo; + jpeg_error_mgr jerr; + + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_decompress(&cinfo); + + cinfo.src = (struct jpeg_source_mgr *) (*cinfo.mem->alloc_small) + ((j_common_ptr) &cinfo, JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr)); + cinfo.src->init_source = init_source; + cinfo.src->fill_input_buffer = fill_input_buffer; + cinfo.src->skip_input_data = skip_input_data; + cinfo.src->resync_to_restart = jpeg_resync_to_restart; // use default method + cinfo.src->term_source = term_source; + cinfo.src->bytes_in_buffer = byte_array.size(); + cinfo.src->next_input_byte = byte_array.buffer(); + + jpeg_read_header(&cinfo, TRUE); + jpeg_start_decompress(&cinfo); + + /* + cinfo.do_fancy_upsampling = FALSE; // fast decompression + cinfo.dct_method = JDCT_FLOAT; // Choose floating point DCT method. + */ + + uint8 * tmp_buffer = new uint8 [cinfo.output_width * cinfo.output_height * cinfo.num_components]; + uint8 * scanline = tmp_buffer; + + while( cinfo.output_scanline < cinfo.output_height ){ + int num_scanlines = jpeg_read_scanlines (&cinfo, &scanline, 1); + scanline += num_scanlines * cinfo.output_width * cinfo.num_components; + } + + jpeg_finish_decompress(&cinfo); + + AutoPtr img(new Image()); + img->allocate(cinfo.output_width, cinfo.output_height); + + Color32 * dst = img->pixels(); + const int size = img->height() * img->width(); + const uint8 * src = tmp_buffer; + + if( cinfo.num_components == 3 ) { + img->setFormat(Image::Format_RGB); + for( int i = 0; i < size; i++ ) { + *dst++ = Color32(src[0], src[1], src[2]); + src += 3; + } + } + else { + img->setFormat(Image::Format_ARGB); + for( int i = 0; i < size; i++ ) { + *dst++ = Color32(*src, *src, *src, *src); + src++; + } + } + + delete [] tmp_buffer; + jpeg_destroy_decompress (&cinfo); + + return img.release(); +} + +#endif // defined(HAVE_JPEG) + +#if defined(HAVE_TIFF) + +/* +static tsize_t tiffReadWriteProc(thandle_t h, tdata_t ptr, tsize_t size) +{ + Stream * s = (Stream *)h; + nvDebugCheck(s != NULL); + + s->serialize(ptr, size); + + return size; +} + +static toff_t tiffSeekProc(thandle_t h, toff_t offset, int whence) +{ + Stream * s = (Stream *)h; + nvDebugCheck(s != NULL); + + if (!s->isSeekable()) + { + return (toff_t)-1; + } + + if (whence == SEEK_SET) + { + s->seek(offset); + } + else if (whence == SEEK_CUR) + { + s->seek(s->tell() + offset); + } + else if (whence == SEEK_END) + { + s->seek(s->size() + offset); + } + + return s->tell(); +} + +static int tiffCloseProc(thandle_t) +{ + return 0; +} + +static toff_t tiffSizeProc(thandle_t h) +{ + Stream * s = (Stream *)h; + nvDebugCheck(s != NULL); + return s->size(); +} + +static int tiffMapFileProc(thandle_t, tdata_t*, toff_t*) +{ + // @@ TODO, Implement these functions. + return -1; +} + +static void tiffUnmapFileProc(thandle_t, tdata_t, toff_t) +{ + // @@ TODO, Implement these functions. +} +*/ + +static FloatImage * loadFloatTIFF(const char * fileName, Stream & s) +{ + nvCheck(!s.isError()); + + TIFF * tif = TIFFOpen(fileName, "r"); + //TIFF * tif = TIFFClientOpen(fileName, "r", &s, tiffReadWriteProc, tiffReadWriteProc, tiffSeekProc, tiffCloseProc, tiffSizeProc, tiffMapFileProc, tiffUnmapFileProc); + + if (!tif) + { + nvDebug("Can't open '%s' for reading\n", fileName); + return NULL; + } + + ::uint16 spp, bpp, format; + ::uint32 width, height; + TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &height); + TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &width); + TIFFGetField(tif, TIFFTAG_BITSPERSAMPLE, &bpp); + TIFFGetField(tif, TIFFTAG_SAMPLESPERPIXEL, &spp); + TIFFGetField(tif, TIFFTAG_SAMPLEFORMAT, &format); + + if (bpp != 8 && bpp != 16 && bpp != 32) { + nvDebug("Can't load '%s', only 1 sample per pixel supported\n", fileName); + TIFFClose(tif); + return NULL; + } + + AutoPtr fimage(new FloatImage()); + fimage->allocate(spp, width, height); + + int linesize = TIFFScanlineSize(tif); + tdata_t buf = malloc(linesize); + + for (uint y = 0; y < height; y++) + { + TIFFReadScanline(tif, buf, y, 0); + + for (uint c=0; cscanline(y, c); + + for(uint x = 0; x < width; x++) + { + if (bpp == 8) + { + dst[x] = float(((::uint8 *)buf)[x*spp+c]) / float(0xFF); + } + else if (bpp == 16) + { + dst[x] = float(((::uint16 *)buf)[x*spp+c]) / float(0xFFFF); + } + else if (bpp == 32) + { + if (format==SAMPLEFORMAT_IEEEFP) + { + dst[x] = float(((float *)buf)[x*spp+c]); + } + else + { + dst[x] = float(((::uint32 *)buf)[x*spp+c] >> 8) / float(0xFFFFFF); + } + } + } + } + } + + free(buf); + + TIFFClose(tif); + + return fimage.release(); +} + +static bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) +{ + nvCheck(fileName != NULL); + nvCheck(fimage != NULL); + nvCheck(base_component + num_components <= fimage->componentCount()); + + const int iW = fimage->width(); + const int iH = fimage->height(); + const int iC = num_components; + + TIFF * image = TIFFOpen(fileName, "w"); + + // Open the TIFF file + if (image == NULL) + { + nvDebug("Could not open '%s' for writing\n", fileName); + return false; + } + + TIFFSetField(image, TIFFTAG_IMAGEWIDTH, iW); + TIFFSetField(image, TIFFTAG_IMAGELENGTH, iH); + TIFFSetField(image, TIFFTAG_SAMPLESPERPIXEL, iC); + TIFFSetField(image, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_IEEEFP); + TIFFSetField(image, TIFFTAG_BITSPERSAMPLE, 32); + + uint32 rowsperstrip = TIFFDefaultStripSize(image, (uint32)-1); + + TIFFSetField(image, TIFFTAG_ROWSPERSTRIP, rowsperstrip); + TIFFSetField(image, TIFFTAG_COMPRESSION, COMPRESSION_PACKBITS); + if (num_components == 3) + { + // Set this so that it can be visualized with pfstools. + TIFFSetField(image, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_RGB); + } + TIFFSetField(image, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT); + TIFFSetField(image, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG); + + float * scanline = new float[iW * iC]; + for (int y = 0; y < iH; y++) + { + for (int c = 0; c < iC; c++) + { + const float * src = fimage->scanline(y, base_component + c); + for (int x = 0; x < iW; x++) scanline[x * iC + c] = src[x]; + } + if (TIFFWriteScanline(image, scanline, y, 0)==-1) + { + nvDebug("Error writing scanline %d\n", y); + return false; + } + } + delete [] scanline; + + // Close the file + TIFFClose(image); + return true; +} + +#endif // defined(HAVE_TIFF) + +#if defined(HAVE_OPENEXR) + +namespace +{ + class ExrStream : public Imf::IStream + { + public: + ExrStream(const char * name, Stream & s) : Imf::IStream(name), m_stream(s) + { + nvDebugCheck(s.isLoading()); + } + + virtual bool read(char c[], int n) + { + m_stream.serialize(c, n); + + if (m_stream.isError()) + { + throw Iex::InputExc("I/O error."); + } + + return m_stream.isAtEnd(); + } + + virtual Imf::Int64 tellg() + { + return m_stream.tell(); + } + + virtual void seekg(Imf::Int64 pos) + { + nvDebugCheck(pos >= 0 && pos < UINT_MAX); + m_stream.seek((uint)pos); + } + + virtual void clear() + { + m_stream.clearError(); + } + + private: + Stream & m_stream; + }; + + static int channelIndexFromName(const char* name) + { + char c = tolower(name[0]); + switch (c) + { + default: + case 'r': + return 0; + case 'g': + return 1; + case 'b': + return 2; + case 'a': + return 3; + } + } + +} // namespace + +static FloatImage * loadFloatEXR(const char * fileName, Stream & s) +{ + nvCheck(s.isLoading()); + nvCheck(!s.isError()); + + ExrStream stream(fileName, s); + Imf::InputFile inputFile(stream); + + Imath::Box2i box = inputFile.header().dataWindow(); + + int width = box.max.x - box.min.y + 1; + int height = box.max.x - box.min.y + 1; + + const Imf::ChannelList & channels = inputFile.header().channels(); + + // Count channels. + uint channelCount= 0; + for (Imf::ChannelList::ConstIterator it = channels.begin(); it != channels.end(); ++it) + { + channelCount++; + } + + // Allocate FloatImage. + AutoPtr fimage(new FloatImage()); + fimage->allocate(channelCount, width, height); + + // Describe image's layout with a framebuffer. + Imf::FrameBuffer frameBuffer; + uint i = 0; + for (Imf::ChannelList::ConstIterator it = channels.begin(); it != channels.end(); ++it, ++i) + { + int channelIndex = channelIndexFromName(it.name()); + frameBuffer.insert(it.name(), Imf::Slice(Imf::FLOAT, (char *)fimage->channel(channelIndex), sizeof(float), sizeof(float) * width)); + } + + // Read it. + inputFile.setFrameBuffer (frameBuffer); + inputFile.readPixels (box.min.y, box.max.y); + + return fimage.release(); +} + +static bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) +{ + nvCheck(fileName != NULL); + nvCheck(fimage != NULL); + nvCheck(base_component + num_components <= fimage->componentCount()); + nvCheck(num_components > 0 && num_components <= 4); + + const int w = fimage->width(); + const int h = fimage->height(); + + const char * channelNames[] = {"R", "G", "B", "A"}; + + Imf::Header header (w, h); + + for (uint c = 0; c < num_components; c++) + { + header.channels().insert(channelNames[c], Imf::Channel(Imf::FLOAT)); + } + + Imf::OutputFile file(fileName, header); + Imf::FrameBuffer frameBuffer; + + for (uint c = 0; c < num_components; c++) + { + char * channel = (char *) fimage->channel(base_component + c); + frameBuffer.insert(channelNames[c], Imf::Slice(Imf::FLOAT, channel, sizeof(float), sizeof(float) * w)); + } + + file.setFrameBuffer(frameBuffer); + file.writePixels(h); + + return true; +} + +#endif // defined(HAVE_OPENEXR) + + +#if defined(HAVE_FREEIMAGE) + +static unsigned DLL_CALLCONV ReadProc(void *buffer, unsigned size, unsigned count, fi_handle handle) +{ + Stream * s = (Stream *) handle; + s->serialize(buffer, size * count); + return count; +} + +static unsigned DLL_CALLCONV WriteProc(void *buffer, unsigned size, unsigned count, fi_handle handle) +{ + Stream * s = (Stream *) handle; + s->serialize(buffer, size * count); + return count; +} + +static int DLL_CALLCONV SeekProc(fi_handle handle, long offset, int origin) +{ + Stream * s = (Stream *) handle; + + switch(origin) { + case SEEK_SET : + s->seek(offset); + break; + case SEEK_END : + s->seek(s->size() + offset); + break; + case SEEK_CUR : + s->seek(s->tell() + offset); + break; + default : + return 1; + } + + return 0; +} + +static long DLL_CALLCONV TellProc(fi_handle handle) +{ + Stream * s = (Stream *) handle; + return s->tell(); +} + + +Image * nv::ImageIO::loadFreeImage(FREE_IMAGE_FORMAT fif, Stream & s) +{ + nvCheck(!s.isError()); + + FreeImageIO io; + io.read_proc = ReadProc; + io.write_proc = NULL; + io.seek_proc = SeekProc; + io.tell_proc = TellProc; + + FIBITMAP * bitmap = FreeImage_LoadFromHandle(fif, &io, (fi_handle)&s, 0); + + if (bitmap == NULL) + { + return NULL; + } + + const int w = FreeImage_GetWidth(bitmap); + const int h = FreeImage_GetHeight(bitmap); + + if (FreeImage_GetImageType(bitmap) != FIT_BITMAP) + { + // @@ Use tone mapping? + FIBITMAP * tmp = FreeImage_ConvertToType(bitmap, FIT_BITMAP, true); + FreeImage_Unload(bitmap); + bitmap = tmp; + } + + nvDebugCheck(FreeImage_GetImageType(bitmap) == FIT_BITMAP); + if (FreeImage_GetBPP(bitmap) != 32) + { + FIBITMAP * tmp = FreeImage_ConvertTo32Bits(bitmap); + FreeImage_Unload(bitmap); + bitmap = tmp; + } + + + Image * image = new Image(); + image->allocate(w, h, 1); // freeimage can only load 2d images: + + // Copy the image over to our internal format, FreeImage has the scanlines bottom to top though. + for (int y=0; y < h; y++) + { + const void * src = FreeImage_GetScanLine(bitmap, h - y - 1); + void * dst = image->scanline(y); + + memcpy(dst, src, 4 * w); + } + + FreeImage_Unload(bitmap); + + return image; +} + +FloatImage * nv::ImageIO::loadFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s) +{ + nvCheck(!s.isError()); + + FreeImageIO io; + io.read_proc = ReadProc; + io.write_proc = NULL; + io.seek_proc = SeekProc; + io.tell_proc = TellProc; + + FIBITMAP * bitmap = FreeImage_LoadFromHandle(fif, &io, (fi_handle)&s, 0); + + if (bitmap == NULL) + { + return NULL; + } + + const int w = FreeImage_GetWidth(bitmap); + const int h = FreeImage_GetHeight(bitmap); + + FREE_IMAGE_TYPE fit = FreeImage_GetImageType(bitmap); + + FloatImage * floatImage = new FloatImage(); + + switch (fit) + { + case FIT_BITMAP: + floatImage->allocate(4, w, h); + { + FIBITMAP * tmp = FreeImage_ConvertTo32Bits(bitmap); + + uint bitcount = FreeImage_GetBPP(bitmap); + uint byteCount = bitcount / 8; + + for (int y=0; y < h; y++) + { + const Color32 * src = (const Color32 *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + + float * r = floatImage->scanline(y, 0); + float * g = floatImage->scanline(y, 1); + float * b = floatImage->scanline(y, 2); + float * a = floatImage->scanline(y, 3); + + for (int x=0; x < w; x++) + { + r[x] = float(src[x].r) / 255.0f; + g[x] = float(src[x].g) / 255.0f; + b[x] = float(src[x].b) / 255.0f; + a[x] = float(src[x].a) / 255.0f; + } + + src += byteCount; + } + + FreeImage_Unload(tmp); + } + break; + case FIT_FLOAT: + floatImage->allocate(1, w, h); + + for (int y=0; y < h; y++) + { + const float * src = (const float *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + float * dst = floatImage->scanline(y, 0); + + for (int x=0; x < w; x++) + { + dst[x] = src[x]; + } + } + break; + case FIT_UINT16: + floatImage->allocate(1, w, h); + + for (int y=0; y < h; y++) + { + const uint16 * src = (const uint16 *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + float * dst = floatImage->scanline(y, 0); + + for (int x=0; x < w; x++) + { + dst[x] = float(src[x]) / 65535; + } + } + break; + case FIT_COMPLEX: + floatImage->allocate(2, w, h); + + for (int y=0; y < h; y++) + { + const FICOMPLEX * src = (const FICOMPLEX *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + + float * dst_real = floatImage->scanline(y, 0); + float * dst_imag = floatImage->scanline(y, 1); + + for (int x=0; x < w; x++) + { + dst_real[x] = (float)src[x].r; + dst_imag[x] = (float)src[x].i; + } + } + break; + case FIT_RGBF: + floatImage->allocate(3, w, h); + + for (int y=0; y < h; y++) + { + const FIRGBF * src = (const FIRGBF *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + + float * dst_red = floatImage->scanline(y, 0); + float * dst_green = floatImage->scanline(y, 1); + float * dst_blue = floatImage->scanline(y, 2); + + for (int x=0; x < w; x++) + { + dst_red[x] = src[x].red; + dst_green[x] = src[x].green; + dst_blue[x] = src[x].blue; + } + } + break; + case FIT_RGBAF: + floatImage->allocate(4, w, h); + + for (int y=0; y < h; y++) + { + const FIRGBAF * src = (const FIRGBAF *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + + float * dst_red = floatImage->scanline(y, 0); + float * dst_green = floatImage->scanline(y, 1); + float * dst_blue = floatImage->scanline(y, 2); + float * dst_alpha = floatImage->scanline(y, 3); + + for (int x=0; x < w; x++) + { + dst_red[x] = src[x].red; + dst_green[x] = src[x].green; + dst_blue[x] = src[x].blue; + dst_alpha[x] = src[x].alpha; + } + } + break; + default: + delete floatImage; + floatImage = NULL; + } + + FreeImage_Unload(bitmap); + + return floatImage; +} + +bool nv::ImageIO::saveFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const Image * img, const char ** tags) +{ + nvCheck(!s.isError()); + + FreeImageIO io; + io.read_proc = NULL; + io.write_proc = WriteProc; + io.seek_proc = SeekProc; + io.tell_proc = TellProc; + + const uint w = img->width(); + const uint h = img->height(); + + FIBITMAP * bitmap = FreeImage_Allocate(w, h, 32); + + for (uint i = 0; i < h; i++) + { + uint8 * scanline = FreeImage_GetScanLine(bitmap, i); + memcpy(scanline, img->scanline(h - i - 1), w * sizeof(Color32)); + } + + if (tags != NULL) + { + #pragma NV_MESSAGE("TODO: Save image metadata") + //FreeImage_SetMetadata( + } + + bool result = FreeImage_SaveToHandle(fif, bitmap, &io, (fi_handle)&s, 0) != 0; + + FreeImage_Unload(bitmap); + + return result; +} + +bool nv::ImageIO::saveFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const FloatImage * img, uint baseComponent, uint componentCount) +{ + nvCheck(!s.isError()); + + FreeImageIO io; + io.read_proc = NULL; + io.write_proc = WriteProc; + io.seek_proc = SeekProc; + io.tell_proc = TellProc; + + const uint w = img->width(); + const uint h = img->height(); + + FREE_IMAGE_TYPE type; + if (componentCount == 1) + { + type = FIT_FLOAT; + } + else if (componentCount == 3) + { + type = FIT_RGBF; + } + else if (componentCount == 4) + { + type = FIT_RGBAF; + } + else { + return false; + } + + + FIBITMAP * bitmap = FreeImage_AllocateT(type, w, h); + + for (uint y = 0; y < h; y++) + { + float * scanline = (float *)FreeImage_GetScanLine(bitmap, y); + + for (uint x = 0; x < w; x++) + { + for (uint c = 0; c < componentCount; c++) + { + scanline[x * componentCount + c] = img->pixel(x, y, baseComponent + c); + } + } + } + + bool result = FreeImage_SaveToHandle(fif, bitmap, &io, (fi_handle)&s, 0) != 0; + + FreeImage_Unload(bitmap); + + return result; +} + +#endif // defined(HAVE_FREEIMAGE) + + +#if defined(HAVE_STBIMAGE) + +static Image * loadSTB(Stream & s) +{ + // @@ Assumes stream cursor is at the beginning and that image occupies the whole stream. + const int size = s.size(); + uint8 * buffer = new uint8[size]; + + s.serialize(buffer, size); + + int w, h, n; + uint8 * data = stbi_load_from_memory(buffer, size, &w, &h, &n, 4); + + delete buffer; + + if (data != NULL) { + Image * img = new Image; + img->allocate(w, h); + img->setFormat(n == 4 ? Image::Format_ARGB : Image::Format_RGB); + + for (int y = 0; y < h; ++y) + { + nv::Color32* dest = img->scanline(y); + uint8* src = data + y * w * 4; + + for (int x = 0; x < w; ++x) + { + dest[x].r = src[x * 4 + 0]; + dest[x].g = src[x * 4 + 1]; + dest[x].b = src[x * 4 + 2]; + dest[x].a = src[x * 4 + 3]; + } + } + + free(data); + + return img; + } + + return NULL; +} + +static FloatImage * loadFloatSTB(Stream & s) +{ + // @@ Assumes stream cursor is at the beginning and that image occupies the whole stream. + const int size = s.size(); + uint8 * buffer = new uint8[size]; + + s.serialize(buffer, size); + + int w, h, n; + float * data = stbi_loadf_from_memory(buffer, size, &w, &h, &n, 0); + + delete buffer; + + // Copy to image. + if (data != NULL) { + FloatImage * img = new FloatImage; + img->allocate(n, w, h); + + const int count = w * h; + + for (int c = 0; c < n; c++) { + float * dst = img->channel(c); + + for (int i = 0; i < count; i++) { + dst[i] = data[i*n + c]; + } + } + return img; + } + + return NULL; +} + +#endif // defined(HAVE_STBIMAGE) + + + + + +Image * nv::ImageIO::load(const char * fileName) +{ + nvDebugCheck(fileName != NULL); + + StdInputStream stream(fileName); + + if (stream.isError()) { + return NULL; + } + + return ImageIO::load(fileName, stream); +} + +Image * nv::ImageIO::load(const char * fileName, Stream & s) +{ + nvDebugCheck(fileName != NULL); + nvDebugCheck(s.isLoading()); + + const char * extension = Path::extension(fileName); + + if (strCaseCmp(extension, ".tga") == 0) { + return loadTGA(s); + } + + if (strCaseCmp(extension, ".psd") == 0) { + return loadPSD(s); + } + + /*if (strCaseCmp(extension, ".ppm") == 0) { + return loadPPM(s); + }*/ + +#if defined(HAVE_JPEG) + if (strCaseCmp(extension, ".jpg") == 0 || strCaseCmp(extension, ".jpeg") == 0) { + return loadJPG(s); + } +#endif + +#if defined(HAVE_PNG) + if (strCaseCmp(extension, ".png") == 0) { + return loadPNG(s); + } +#endif + +#if defined(HAVE_FREEIMAGE) + FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); + if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { + return loadFreeImage(fif, s); + } +#endif + +#if defined(HAVE_STBIMAGE) + return loadSTB(s); +#endif + + return NULL; +} + +bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, const char ** tags/*=NULL*/) +{ + nvDebugCheck(fileName != NULL); + nvDebugCheck(s.isSaving()); + nvDebugCheck(img != NULL); + + const char * extension = Path::extension(fileName); + + if (strCaseCmp(extension, ".tga") == 0) { + return saveTGA(s, img); + } + + if (strCaseCmp(extension, ".ppm") == 0) { + return savePPM(s, img); + } + +#if defined(HAVE_PNG) + if (strCaseCmp(extension, ".png") == 0) { + return savePNG(s, img, tags); + } +#endif + +#if defined(HAVE_FREEIMAGE) + FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); + if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { + return saveFreeImage(fif, s, img, tags); + } +#endif + + return false; +} + +bool nv::ImageIO::save(const char * fileName, const Image * img, const char ** tags/*=NULL*/) +{ + nvDebugCheck(fileName != NULL); + nvDebugCheck(img != NULL); + + StdOutputStream stream(fileName); + if (stream.isError()) + { + return false; + } + + return ImageIO::save(fileName, stream, img, tags); +} + +FloatImage * nv::ImageIO::loadFloat(const char * fileName) +{ + nvDebugCheck(fileName != NULL); + + StdInputStream stream(fileName); + + if (stream.isError()) { + return NULL; + } + + return loadFloat(fileName, stream); +} + +FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s) +{ + nvDebugCheck(fileName != NULL); + + const char * extension = Path::extension(fileName); + + /*if (strCaseCmp(extension, ".pfm") == 0) { + return loadFloatPFM(s); + }*/ + +#if defined(HAVE_TIFF) + #pragma NV_MESSAGE("TODO: Load TIFF from stream.") + if (strCaseCmp(extension, ".tif") == 0 || strCaseCmp(extension, ".tiff") == 0) { + return loadFloatTIFF(fileName, s); + } +#endif + +#if defined(HAVE_OPENEXR) + #pragma NV_MESSAGE("TODO: Load EXR from stream.") + if (strCaseCmp(extension, ".exr") == 0) { + return loadFloatEXR(fileName, s); + } +#endif + +#if defined(HAVE_FREEIMAGE) + FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); + if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { + return loadFloatFreeImage(fif, s); + } +#endif + + if (strCaseCmp(extension, ".dds") == 0) { + const uint spos = s.tell(); // Save stream position. + FloatImage * floatImage = loadFloatDDS(s); + if (floatImage != NULL) return floatImage; + else s.seek(spos); + } + + // Try to load as an RGBA8 image and convert to float. + AutoPtr img(load(fileName, s)); + if (img != NULL) { + return new FloatImage(img.ptr()); + } + + return NULL; +} + +bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage * fimage, uint baseComponent, uint componentCount) +{ + if (componentCount == 0) { + componentCount = fimage->componentCount() - baseComponent; + } + if (baseComponent + componentCount < fimage->componentCount()) { + return false; + } + + const char * extension = Path::extension(fileName); + + if (strCaseCmp(extension, ".dds") == 0) { + return saveFloatDDS(s, fimage, baseComponent, componentCount); + } + + /*if (strCaseCmp(extension, ".pfm") == 0) { + return saveFloatPFM(s, fimage, baseComponent, componentCount); + }*/ + +#if defined(HAVE_FREEIMAGE) + FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); + if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { + return saveFloatFreeImage(fif, s, fimage, baseComponent, componentCount); + } +#endif + + // If everything else fails, save as LDR. + if (componentCount <= 4) + { + AutoPtr image(fimage->createImage(baseComponent, componentCount)); + nvCheck(image != NULL); + + if (componentCount == 1) + { + Color32 * c = image->pixels(); + const uint count = image->width() * image->height(); + for (uint i = 0; i < count; i++) + { + c[i].b = c[i].g = c[i].r; + } + } + + if (componentCount == 4) + { + image->setFormat(Image::Format_ARGB); + } + + return ImageIO::save(fileName, s, image.ptr()); + } + + return false; +} + +bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount) +{ + if (componentCount == 0) { + componentCount = fimage->componentCount() - baseComponent; + } + if (baseComponent + componentCount < fimage->componentCount()) { + return false; + } + + const char * extension = Path::extension(fileName); + +#if defined(HAVE_OPENEXR) + if (strCaseCmp(extension, ".exr") == 0) { + return saveFloatEXR(fileName, fimage, baseComponent, componentCount); + } +#endif + +#if defined(HAVE_TIFF) + if (strCaseCmp(extension, ".tif") == 0 || strCaseCmp(extension, ".tiff") == 0) { + return saveFloatTIFF(fileName, fimage, baseComponent, componentCount); + } +#endif + + StdOutputStream stream(fileName); + + if (stream.isError()) { + return false; + } + + return saveFloat(fileName, stream, fimage, baseComponent, componentCount); +} diff --git a/src/nvimage/ImageIO.h b/src/nvimage/ImageIO.h index ee3bfb0..25490ab 100644 --- a/src/nvimage/ImageIO.h +++ b/src/nvimage/ImageIO.h @@ -1,37 +1,37 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#pragma once -#ifndef NV_IMAGE_IMAGEIO_H -#define NV_IMAGE_IMAGEIO_H - -#include "nvimage.h" - -#include "nvcore/StrLib.h" - - -namespace nv -{ - class Image; - class FloatImage; - class Stream; - - namespace ImageIO - { - NVIMAGE_API Image * load(const char * fileName); - NVIMAGE_API Image * load(const char * fileName, Stream & s); - - NVIMAGE_API FloatImage * loadFloat(const char * fileName); - NVIMAGE_API FloatImage * loadFloat(const char * fileName, Stream & s); - - NVIMAGE_API bool save(const char * fileName, const Image * img, const char ** tags=NULL); // NULL terminated list. - NVIMAGE_API bool save(const char * fileName, Stream & s, const Image * img, const char ** tags=NULL); - - NVIMAGE_API bool saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount); - NVIMAGE_API bool saveFloat(const char * fileName, Stream & s, const FloatImage * fimage, uint baseComponent, uint componentCount); - - } // ImageIO namespace - -} // nv namespace - - -#endif // NV_IMAGE_IMAGEIO_H +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_IMAGE_IMAGEIO_H +#define NV_IMAGE_IMAGEIO_H + +#include "nvimage.h" + +#include "nvcore/StrLib.h" + + +namespace nv +{ + class Image; + class FloatImage; + class Stream; + + namespace ImageIO + { + NVIMAGE_API Image * load(const char * fileName); + NVIMAGE_API Image * load(const char * fileName, Stream & s); + + NVIMAGE_API FloatImage * loadFloat(const char * fileName); + NVIMAGE_API FloatImage * loadFloat(const char * fileName, Stream & s); + + NVIMAGE_API bool save(const char * fileName, const Image * img, const char ** tags=NULL); // NULL terminated list. + NVIMAGE_API bool save(const char * fileName, Stream & s, const Image * img, const char ** tags=NULL); + + NVIMAGE_API bool saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount); + NVIMAGE_API bool saveFloat(const char * fileName, Stream & s, const FloatImage * fimage, uint baseComponent, uint componentCount); + + } // ImageIO namespace + +} // nv namespace + + +#endif // NV_IMAGE_IMAGEIO_H diff --git a/src/nvimage/NormalMap.cpp b/src/nvimage/NormalMap.cpp index e0b1092..404186d 100644 --- a/src/nvimage/NormalMap.cpp +++ b/src/nvimage/NormalMap.cpp @@ -27,6 +27,7 @@ #include "Image.h" #include "nvmath/Color.inl" +#include "nvmath/Vector.h" #include "nvcore/Ptr.h" diff --git a/src/nvimage/NormalMap.h b/src/nvimage/NormalMap.h index 3f13d42..fc484c9 100644 --- a/src/nvimage/NormalMap.h +++ b/src/nvimage/NormalMap.h @@ -1,59 +1,59 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#pragma once -#ifndef NV_IMAGE_NORMALMAP_H -#define NV_IMAGE_NORMALMAP_H - -#include "nvimage.h" -#include "FloatImage.h" - -#include "nvmath/Vector.h" - - -namespace nv -{ - class Image; - - enum NormalMapFilter - { - NormalMapFilter_Sobel3x3, // fine detail - NormalMapFilter_Sobel5x5, // medium detail - NormalMapFilter_Sobel7x7, // large detail - NormalMapFilter_Sobel9x9, // very large - }; - - // @@ These two functions should be deprecated: - FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3); - FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights); - - FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights); - - void normalizeNormalMap(FloatImage * img); - - // @@ Add generation of DU/DV maps. - - -} // nv namespace - -#endif // NV_IMAGE_NORMALMAP_H +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#pragma once +#ifndef NV_IMAGE_NORMALMAP_H +#define NV_IMAGE_NORMALMAP_H + +#include "nvimage.h" +#include "FloatImage.h" + +#include "nvmath/Vector.h" + + +namespace nv +{ + class Image; + + enum NormalMapFilter + { + NormalMapFilter_Sobel3x3, // fine detail + NormalMapFilter_Sobel5x5, // medium detail + NormalMapFilter_Sobel7x7, // large detail + NormalMapFilter_Sobel9x9, // very large + }; + + // @@ These two functions should be deprecated: + FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3); + FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights); + + FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights); + + void normalizeNormalMap(FloatImage * img); + + // @@ Add generation of DU/DV maps. + + +} // nv namespace + +#endif // NV_IMAGE_NORMALMAP_H diff --git a/src/nvimage/PixelFormat.h b/src/nvimage/PixelFormat.h index 78c0a68..8610c6e 100644 --- a/src/nvimage/PixelFormat.h +++ b/src/nvimage/PixelFormat.h @@ -1,118 +1,118 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#pragma once -#ifndef NV_IMAGE_PIXELFORMAT_H -#define NV_IMAGE_PIXELFORMAT_H - -#include "nvimage.h" - - -namespace nv -{ - namespace PixelFormat - { - - // Convert component @a c having @a inbits to the returned value having @a outbits. - inline uint convert(uint c, uint inbits, uint outbits) - { - if (inbits == 0) - { - return 0; - } - else if (inbits >= outbits) - { - // truncate - return c >> (inbits - outbits); - } - else - { - // bitexpand - return (c << (outbits - inbits)) | convert(c, inbits, outbits - inbits); - } - } - - // Get pixel component shift and size given its mask. - inline void maskShiftAndSize(uint mask, uint * shift, uint * size) - { - if (!mask) - { - *shift = 0; - *size = 0; - return; - } - - *shift = 0; - while((mask & 1) == 0) { - ++(*shift); - mask >>= 1; - } - - *size = 0; - while((mask & 1) == 1) { - ++(*size); - mask >>= 1; - } - } - - inline float quantizeCeil(float f, int inbits, int outbits) - { - nvDebugCheck(f >= 0.0f && f <= 1.0f); - //uint i = f * (float(1 << inbits) - 1); - //i = convert(i, inbits, outbits); - //float result = float(i) / (float(1 << outbits) - 1); - //nvCheck(result >= f); - float result; - int offset = 0; - do { - uint i = offset + uint(f * (float(1 << inbits) - 1)); - i = convert(i, inbits, outbits); - result = float(i) / (float(1 << outbits) - 1); - offset++; - } while (result < f); - - return result; - } - - /* - inline float quantizeRound(float f, int bits) - { - nvDebugCheck(f >= 0.0f && f <= 1.0f); - float scale = float(1 << bits); - return fround(f * scale) / scale; - } - - inline float quantizeFloor(float f, int bits) - { - nvDebugCheck(f >= 0.0f && f <= 1.0f); - float scale = float(1 << bits); - return floor(f * scale) / scale; - } - */ - - } // PixelFormat namespace - -} // nv namespace - - -#endif // NV_IMAGE_PIXELFORMAT_H +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#pragma once +#ifndef NV_IMAGE_PIXELFORMAT_H +#define NV_IMAGE_PIXELFORMAT_H + +#include "nvimage.h" + + +namespace nv +{ + namespace PixelFormat + { + + // Convert component @a c having @a inbits to the returned value having @a outbits. + inline uint convert(uint c, uint inbits, uint outbits) + { + if (inbits == 0) + { + return 0; + } + else if (inbits >= outbits) + { + // truncate + return c >> (inbits - outbits); + } + else + { + // bitexpand + return (c << (outbits - inbits)) | convert(c, inbits, outbits - inbits); + } + } + + // Get pixel component shift and size given its mask. + inline void maskShiftAndSize(uint mask, uint * shift, uint * size) + { + if (!mask) + { + *shift = 0; + *size = 0; + return; + } + + *shift = 0; + while((mask & 1) == 0) { + ++(*shift); + mask >>= 1; + } + + *size = 0; + while((mask & 1) == 1) { + ++(*size); + mask >>= 1; + } + } + + inline float quantizeCeil(float f, int inbits, int outbits) + { + nvDebugCheck(f >= 0.0f && f <= 1.0f); + //uint i = f * (float(1 << inbits) - 1); + //i = convert(i, inbits, outbits); + //float result = float(i) / (float(1 << outbits) - 1); + //nvCheck(result >= f); + float result; + int offset = 0; + do { + uint i = offset + uint(f * (float(1 << inbits) - 1)); + i = convert(i, inbits, outbits); + result = float(i) / (float(1 << outbits) - 1); + offset++; + } while (result < f); + + return result; + } + + /* + inline float quantizeRound(float f, int bits) + { + nvDebugCheck(f >= 0.0f && f <= 1.0f); + float scale = float(1 << bits); + return fround(f * scale) / scale; + } + + inline float quantizeFloor(float f, int bits) + { + nvDebugCheck(f >= 0.0f && f <= 1.0f); + float scale = float(1 << bits); + return floor(f * scale) / scale; + } + */ + + } // PixelFormat namespace + +} // nv namespace + + +#endif // NV_IMAGE_PIXELFORMAT_H diff --git a/src/nvimage/PsdFile.h b/src/nvimage/PsdFile.h index 3f242c8..b4c625a 100644 --- a/src/nvimage/PsdFile.h +++ b/src/nvimage/PsdFile.h @@ -1,71 +1,71 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#pragma once -#ifndef NV_IMAGE_PSDFILE_H -#define NV_IMAGE_PSDFILE_H - -#include "nvcore/Stream.h" - -namespace nv -{ - enum PsdColorMode - { - PsdColorMode_Bitmap = 0, - PsdColorMode_GrayScale = 1, - PsdColorMode_Indexed = 2, - PsdColorMode_RGB = 3, - PsdColorMode_CMYK = 4, - PsdColorMode_MultiChannel = 7, - PsdColorMode_DuoTone = 8, - PsdColorMode_LabColor = 9 - }; - - /// PSD header. - struct PsdHeader - { - uint32 signature; - uint16 version; - uint8 reserved[6]; - uint16 channel_count; - uint32 height; - uint32 width; - uint16 depth; - uint16 color_mode; - - bool isValid() const - { - return signature == 0x38425053; // '8BPS' - } - - bool isSupported() const - { - if (version != 1) { - nvDebug("*** bad version number %u\n", version); - return false; - } - if (channel_count > 4) { - return false; - } - if (depth != 8) { // @@ Add support for 16 bit depths. - return false; - } - if (color_mode != PsdColorMode_RGB) { - return false; - } - return true; - } - }; - - - inline Stream & operator<< (Stream & s, PsdHeader & head) - { - s << head.signature << head.version; - for (int i = 0; i < 6; i++) { - s << head.reserved[i]; - } - return s << head.channel_count << head.height << head.width << head.depth << head.color_mode; - } - -} // nv namespace - -#endif // NV_IMAGE_PSDFILE_H +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_IMAGE_PSDFILE_H +#define NV_IMAGE_PSDFILE_H + +#include "nvcore/Stream.h" + +namespace nv +{ + enum PsdColorMode + { + PsdColorMode_Bitmap = 0, + PsdColorMode_GrayScale = 1, + PsdColorMode_Indexed = 2, + PsdColorMode_RGB = 3, + PsdColorMode_CMYK = 4, + PsdColorMode_MultiChannel = 7, + PsdColorMode_DuoTone = 8, + PsdColorMode_LabColor = 9 + }; + + /// PSD header. + struct PsdHeader + { + uint32 signature; + uint16 version; + uint8 reserved[6]; + uint16 channel_count; + uint32 height; + uint32 width; + uint16 depth; + uint16 color_mode; + + bool isValid() const + { + return signature == 0x38425053; // '8BPS' + } + + bool isSupported() const + { + if (version != 1) { + nvDebug("*** bad version number %u\n", version); + return false; + } + if (channel_count > 4) { + return false; + } + if (depth != 8) { // @@ Add support for 16 bit depths. + return false; + } + if (color_mode != PsdColorMode_RGB) { + return false; + } + return true; + } + }; + + + inline Stream & operator<< (Stream & s, PsdHeader & head) + { + s << head.signature << head.version; + for (int i = 0; i < 6; i++) { + s << head.reserved[i]; + } + return s << head.channel_count << head.height << head.width << head.depth << head.color_mode; + } + +} // nv namespace + +#endif // NV_IMAGE_PSDFILE_H diff --git a/src/nvimage/Quantize.cpp b/src/nvimage/Quantize.cpp index 64168c8..889a8c2 100644 --- a/src/nvimage/Quantize.cpp +++ b/src/nvimage/Quantize.cpp @@ -1,222 +1,222 @@ -// This code is in the public domain -- castanyo@yahoo.es - -/* -http://www.visgraf.impa.br/Courses/ip00/proj/Dithering1/floyd_steinberg_dithering.html -http://www.gamedev.net/reference/articles/article341.asp - -@@ Look at LPS: http://www.cs.rit.edu/~pga/pics2000/i.html - -This is a really nice guide to dithering algorithms: -http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT - -@@ This code needs to be reviewed, I'm not sure it's correct. -*/ - -#include "Quantize.h" -#include "Image.h" -#include "PixelFormat.h" - -#include "nvmath/Color.h" -#include "nvmath/Vector.inl" - -#include "nvcore/Utils.h" // swap - -#include // memset - - -using namespace nv; - - -// Simple quantization. -void nv::Quantize::BinaryAlpha( Image * image, int alpha_threshold /*= 127*/ ) -{ - nvCheck(image != NULL); - - const uint w = image->width(); - const uint h = image->height(); - - for(uint y = 0; y < h; y++) { - for(uint x = 0; x < w; x++) { - - Color32 pixel = image->pixel(x, y); - - // Convert color. - if( pixel.a > alpha_threshold ) pixel.a = 255; - else pixel.a = 0; - - // Store color. - image->pixel(x, y) = pixel; - } - } -} - - -// Simple quantization. -void nv::Quantize::RGB16( Image * image ) -{ - Truncate(image, 5, 6, 5, 8); -} - -// Alpha quantization. -void nv::Quantize::Alpha4( Image * image ) -{ - Truncate(image, 8, 8, 8, 4); -} - - -// Error diffusion. Floyd Steinberg. -void nv::Quantize::FloydSteinberg_RGB16( Image * image ) -{ - FloydSteinberg(image, 5, 6, 5, 8); -} - - -// Error diffusion. Floyd Steinberg. -void nv::Quantize::FloydSteinberg_BinaryAlpha( Image * image, int alpha_threshold /*= 127*/ ) -{ - nvCheck(image != NULL); - - const uint w = image->width(); - const uint h = image->height(); - - // @@ Use fixed point? - float * row0 = new float[(w+2)]; - float * row1 = new float[(w+2)]; - memset(row0, 0, sizeof(float)*(w+2)); - memset(row1, 0, sizeof(float)*(w+2)); - - for(uint y = 0; y < h; y++) { - for(uint x = 0; x < w; x++) { - - Color32 pixel = image->pixel(x, y); - - // Add error. - int alpha = int(pixel.a) + int(row0[1+x]); - - // Convert color. - if( alpha > alpha_threshold ) pixel.a = 255; - else pixel.a = 0; - - // Store color. - image->pixel(x, y) = pixel; - - // Compute new error. - float diff = float(alpha - pixel.a); - - // Propagate new error. - row0[1+x+1] += 7.0f / 16.0f * diff; - row1[1+x-1] += 3.0f / 16.0f * diff; - row1[1+x+0] += 5.0f / 16.0f * diff; - row1[1+x+1] += 1.0f / 16.0f * diff; - } - - swap(row0, row1); - memset(row1, 0, sizeof(float)*(w+2)); - } - - delete [] row0; - delete [] row1; -} - - -// Error diffusion. Floyd Steinberg. -void nv::Quantize::FloydSteinberg_Alpha4( Image * image ) -{ - FloydSteinberg(image, 8, 8, 8, 4); -} - - -void nv::Quantize::Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize) -{ - nvCheck(image != NULL); - - const uint w = image->width(); - const uint h = image->height(); - - for(uint y = 0; y < h; y++) { - for(uint x = 0; x < w; x++) { - - Color32 pixel = image->pixel(x, y); - - // Convert to our desired size, and reconstruct. - pixel.r = PixelFormat::convert(pixel.r, 8, rsize); - pixel.r = PixelFormat::convert(pixel.r, rsize, 8); - - pixel.g = PixelFormat::convert(pixel.g, 8, gsize); - pixel.g = PixelFormat::convert(pixel.g, gsize, 8); - - pixel.b = PixelFormat::convert(pixel.b, 8, bsize); - pixel.b = PixelFormat::convert(pixel.b, bsize, 8); - - pixel.a = PixelFormat::convert(pixel.a, 8, asize); - pixel.a = PixelFormat::convert(pixel.a, asize, 8); - - // Store color. - image->pixel(x, y) = pixel; - } - } -} - - -// Error diffusion. Floyd Steinberg. -void nv::Quantize::FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize) -{ - nvCheck(image != NULL); - - const uint w = image->width(); - const uint h = image->height(); - - Vector4 * row0 = new Vector4[w+2]; - Vector4 * row1 = new Vector4[w+2]; - memset(row0, 0, sizeof(Vector4)*(w+2)); - memset(row1, 0, sizeof(Vector4)*(w+2)); - - for (uint y = 0; y < h; y++) { - for (uint x = 0; x < w; x++) { - - Color32 pixel = image->pixel(x, y); - - // Add error. - pixel.r = clamp(int(pixel.r) + int(row0[1+x].x), 0, 255); - pixel.g = clamp(int(pixel.g) + int(row0[1+x].y), 0, 255); - pixel.b = clamp(int(pixel.b) + int(row0[1+x].z), 0, 255); - pixel.a = clamp(int(pixel.a) + int(row0[1+x].w), 0, 255); - - int r = pixel.r; - int g = pixel.g; - int b = pixel.b; - int a = pixel.a; - - // Convert to our desired size, and reconstruct. - r = PixelFormat::convert(r, 8, rsize); - r = PixelFormat::convert(r, rsize, 8); - - g = PixelFormat::convert(g, 8, gsize); - g = PixelFormat::convert(g, gsize, 8); - - b = PixelFormat::convert(b, 8, bsize); - b = PixelFormat::convert(b, bsize, 8); - - a = PixelFormat::convert(a, 8, asize); - a = PixelFormat::convert(a, asize, 8); - - // Store color. - image->pixel(x, y) = Color32(r, g, b, a); - - // Compute new error. - Vector4 diff(float(int(pixel.r) - r), float(int(pixel.g) - g), float(int(pixel.b) - b), float(int(pixel.a) - a)); - - // Propagate new error. - row0[1+x+1] += 7.0f / 16.0f * diff; - row1[1+x-1] += 3.0f / 16.0f * diff; - row1[1+x+0] += 5.0f / 16.0f * diff; - row1[1+x+1] += 1.0f / 16.0f * diff; - } - - swap(row0, row1); - memset(row1, 0, sizeof(Vector4)*(w+2)); - } - - delete [] row0; - delete [] row1; -} +// This code is in the public domain -- castanyo@yahoo.es + +/* +http://www.visgraf.impa.br/Courses/ip00/proj/Dithering1/floyd_steinberg_dithering.html +http://www.gamedev.net/reference/articles/article341.asp + +@@ Look at LPS: http://www.cs.rit.edu/~pga/pics2000/i.html + +This is a really nice guide to dithering algorithms: +http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT + +@@ This code needs to be reviewed, I'm not sure it's correct. +*/ + +#include "Quantize.h" +#include "Image.h" +#include "PixelFormat.h" + +#include "nvmath/Color.h" +#include "nvmath/Vector.inl" + +#include "nvcore/Utils.h" // swap + +#include // memset + + +using namespace nv; + + +// Simple quantization. +void nv::Quantize::BinaryAlpha( Image * image, int alpha_threshold /*= 127*/ ) +{ + nvCheck(image != NULL); + + const uint w = image->width(); + const uint h = image->height(); + + for(uint y = 0; y < h; y++) { + for(uint x = 0; x < w; x++) { + + Color32 pixel = image->pixel(x, y); + + // Convert color. + if( pixel.a > alpha_threshold ) pixel.a = 255; + else pixel.a = 0; + + // Store color. + image->pixel(x, y) = pixel; + } + } +} + + +// Simple quantization. +void nv::Quantize::RGB16( Image * image ) +{ + Truncate(image, 5, 6, 5, 8); +} + +// Alpha quantization. +void nv::Quantize::Alpha4( Image * image ) +{ + Truncate(image, 8, 8, 8, 4); +} + + +// Error diffusion. Floyd Steinberg. +void nv::Quantize::FloydSteinberg_RGB16( Image * image ) +{ + FloydSteinberg(image, 5, 6, 5, 8); +} + + +// Error diffusion. Floyd Steinberg. +void nv::Quantize::FloydSteinberg_BinaryAlpha( Image * image, int alpha_threshold /*= 127*/ ) +{ + nvCheck(image != NULL); + + const uint w = image->width(); + const uint h = image->height(); + + // @@ Use fixed point? + float * row0 = new float[(w+2)]; + float * row1 = new float[(w+2)]; + memset(row0, 0, sizeof(float)*(w+2)); + memset(row1, 0, sizeof(float)*(w+2)); + + for(uint y = 0; y < h; y++) { + for(uint x = 0; x < w; x++) { + + Color32 pixel = image->pixel(x, y); + + // Add error. + int alpha = int(pixel.a) + int(row0[1+x]); + + // Convert color. + if( alpha > alpha_threshold ) pixel.a = 255; + else pixel.a = 0; + + // Store color. + image->pixel(x, y) = pixel; + + // Compute new error. + float diff = float(alpha - pixel.a); + + // Propagate new error. + row0[1+x+1] += 7.0f / 16.0f * diff; + row1[1+x-1] += 3.0f / 16.0f * diff; + row1[1+x+0] += 5.0f / 16.0f * diff; + row1[1+x+1] += 1.0f / 16.0f * diff; + } + + swap(row0, row1); + memset(row1, 0, sizeof(float)*(w+2)); + } + + delete [] row0; + delete [] row1; +} + + +// Error diffusion. Floyd Steinberg. +void nv::Quantize::FloydSteinberg_Alpha4( Image * image ) +{ + FloydSteinberg(image, 8, 8, 8, 4); +} + + +void nv::Quantize::Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize) +{ + nvCheck(image != NULL); + + const uint w = image->width(); + const uint h = image->height(); + + for(uint y = 0; y < h; y++) { + for(uint x = 0; x < w; x++) { + + Color32 pixel = image->pixel(x, y); + + // Convert to our desired size, and reconstruct. + pixel.r = PixelFormat::convert(pixel.r, 8, rsize); + pixel.r = PixelFormat::convert(pixel.r, rsize, 8); + + pixel.g = PixelFormat::convert(pixel.g, 8, gsize); + pixel.g = PixelFormat::convert(pixel.g, gsize, 8); + + pixel.b = PixelFormat::convert(pixel.b, 8, bsize); + pixel.b = PixelFormat::convert(pixel.b, bsize, 8); + + pixel.a = PixelFormat::convert(pixel.a, 8, asize); + pixel.a = PixelFormat::convert(pixel.a, asize, 8); + + // Store color. + image->pixel(x, y) = pixel; + } + } +} + + +// Error diffusion. Floyd Steinberg. +void nv::Quantize::FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize) +{ + nvCheck(image != NULL); + + const uint w = image->width(); + const uint h = image->height(); + + Vector4 * row0 = new Vector4[w+2]; + Vector4 * row1 = new Vector4[w+2]; + memset(row0, 0, sizeof(Vector4)*(w+2)); + memset(row1, 0, sizeof(Vector4)*(w+2)); + + for (uint y = 0; y < h; y++) { + for (uint x = 0; x < w; x++) { + + Color32 pixel = image->pixel(x, y); + + // Add error. + pixel.r = clamp(int(pixel.r) + int(row0[1+x].x), 0, 255); + pixel.g = clamp(int(pixel.g) + int(row0[1+x].y), 0, 255); + pixel.b = clamp(int(pixel.b) + int(row0[1+x].z), 0, 255); + pixel.a = clamp(int(pixel.a) + int(row0[1+x].w), 0, 255); + + int r = pixel.r; + int g = pixel.g; + int b = pixel.b; + int a = pixel.a; + + // Convert to our desired size, and reconstruct. + r = PixelFormat::convert(r, 8, rsize); + r = PixelFormat::convert(r, rsize, 8); + + g = PixelFormat::convert(g, 8, gsize); + g = PixelFormat::convert(g, gsize, 8); + + b = PixelFormat::convert(b, 8, bsize); + b = PixelFormat::convert(b, bsize, 8); + + a = PixelFormat::convert(a, 8, asize); + a = PixelFormat::convert(a, asize, 8); + + // Store color. + image->pixel(x, y) = Color32(r, g, b, a); + + // Compute new error. + Vector4 diff(float(int(pixel.r) - r), float(int(pixel.g) - g), float(int(pixel.b) - b), float(int(pixel.a) - a)); + + // Propagate new error. + row0[1+x+1] += 7.0f / 16.0f * diff; + row1[1+x-1] += 3.0f / 16.0f * diff; + row1[1+x+0] += 5.0f / 16.0f * diff; + row1[1+x+1] += 1.0f / 16.0f * diff; + } + + swap(row0, row1); + memset(row1, 0, sizeof(Vector4)*(w+2)); + } + + delete [] row0; + delete [] row1; +} diff --git a/src/nvimage/Quantize.h b/src/nvimage/Quantize.h index 2278d45..2a6a26d 100644 --- a/src/nvimage/Quantize.h +++ b/src/nvimage/Quantize.h @@ -1,32 +1,32 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#pragma once -#ifndef NV_IMAGE_QUANTIZE_H -#define NV_IMAGE_QUANTIZE_H - -#include "nvimage.h" - - -namespace nv -{ - class Image; - - namespace Quantize - { - void RGB16(Image * img); - void BinaryAlpha(Image * img, int alpha_threshold = 127); - void Alpha4(Image * img); - - void FloydSteinberg_RGB16(Image * img); - void FloydSteinberg_BinaryAlpha(Image * img, int alpha_threshold = 127); - void FloydSteinberg_Alpha4(Image * img); - - void Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize); - void FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize); - - // @@ Add palette quantization algorithms! - } -} - - -#endif // NV_IMAGE_QUANTIZE_H +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_IMAGE_QUANTIZE_H +#define NV_IMAGE_QUANTIZE_H + +#include "nvimage.h" + + +namespace nv +{ + class Image; + + namespace Quantize + { + void RGB16(Image * img); + void BinaryAlpha(Image * img, int alpha_threshold = 127); + void Alpha4(Image * img); + + void FloydSteinberg_RGB16(Image * img); + void FloydSteinberg_BinaryAlpha(Image * img, int alpha_threshold = 127); + void FloydSteinberg_Alpha4(Image * img); + + void Truncate(Image * image, uint rsize, uint gsize, uint bsize, uint asize); + void FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize); + + // @@ Add palette quantization algorithms! + } +} + + +#endif // NV_IMAGE_QUANTIZE_H diff --git a/src/nvimage/TgaFile.h b/src/nvimage/TgaFile.h index ed562b6..bce2fc1 100644 --- a/src/nvimage/TgaFile.h +++ b/src/nvimage/TgaFile.h @@ -1,106 +1,106 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#pragma once -#ifndef NV_IMAGE_TGAFILE_H -#define NV_IMAGE_TGAFILE_H - -#include "nvcore/Stream.h" - -namespace nv -{ - -// TGA types -enum TGAType { - TGA_TYPE_INDEXED = 1, - TGA_TYPE_RGB = 2, - TGA_TYPE_GREY = 3, - TGA_TYPE_RLE_INDEXED = 9, - TGA_TYPE_RLE_RGB = 10, - TGA_TYPE_RLE_GREY = 11 -}; - -#define TGA_INTERLEAVE_MASK 0xc0 -#define TGA_INTERLEAVE_NONE 0x00 -#define TGA_INTERLEAVE_2WAY 0x40 -#define TGA_INTERLEAVE_4WAY 0x80 - -#define TGA_ORIGIN_MASK 0x30 -#define TGA_ORIGIN_LEFT 0x00 -#define TGA_ORIGIN_RIGHT 0x10 -#define TGA_ORIGIN_LOWER 0x00 -#define TGA_ORIGIN_UPPER 0x20 - -#define TGA_HAS_ALPHA 0x0F - - -/// Tga Header. -struct TgaHeader { - uint8 id_length; - uint8 colormap_type; - uint8 image_type; - uint16 colormap_index; - uint16 colormap_length; - uint8 colormap_size; - uint16 x_origin; - uint16 y_origin; - uint16 width; - uint16 height; - uint8 pixel_size; - uint8 flags; - - enum { Size = 18 }; //const static int SIZE = 18; -}; - - -/// Tga File. -struct TgaFile { - - TgaFile() { - mem = NULL; - } - ~TgaFile() { - free(); - } - - uint size() const { - return head.width * head.height * (head.pixel_size / 8); - } - void allocate() { - nvCheck( mem == NULL ); - mem = new uint8[size()]; - } - void free() { - delete [] mem; - mem = NULL; - } - - TgaHeader head; - uint8 * mem; -}; - - -inline Stream & operator<< (Stream & s, TgaHeader & head) -{ - s << head.id_length << head.colormap_type << head.image_type; - s << head.colormap_index << head.colormap_length << head.colormap_size; - s << head.x_origin << head.y_origin << head.width << head.height; - s << head.pixel_size << head.flags; - return s; -} - -inline Stream & operator<< (Stream & s, TgaFile & tga) -{ - s << tga.head; - - if( s.isLoading() ) { - tga.allocate(); - } - - s.serialize( tga.mem, tga.size() ); - - return s; -} - -} // nv namespace - -#endif // NV_IMAGE_TGAFILE_H +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_IMAGE_TGAFILE_H +#define NV_IMAGE_TGAFILE_H + +#include "nvcore/Stream.h" + +namespace nv +{ + +// TGA types +enum TGAType { + TGA_TYPE_INDEXED = 1, + TGA_TYPE_RGB = 2, + TGA_TYPE_GREY = 3, + TGA_TYPE_RLE_INDEXED = 9, + TGA_TYPE_RLE_RGB = 10, + TGA_TYPE_RLE_GREY = 11 +}; + +#define TGA_INTERLEAVE_MASK 0xc0 +#define TGA_INTERLEAVE_NONE 0x00 +#define TGA_INTERLEAVE_2WAY 0x40 +#define TGA_INTERLEAVE_4WAY 0x80 + +#define TGA_ORIGIN_MASK 0x30 +#define TGA_ORIGIN_LEFT 0x00 +#define TGA_ORIGIN_RIGHT 0x10 +#define TGA_ORIGIN_LOWER 0x00 +#define TGA_ORIGIN_UPPER 0x20 + +#define TGA_HAS_ALPHA 0x0F + + +/// Tga Header. +struct TgaHeader { + uint8 id_length; + uint8 colormap_type; + uint8 image_type; + uint16 colormap_index; + uint16 colormap_length; + uint8 colormap_size; + uint16 x_origin; + uint16 y_origin; + uint16 width; + uint16 height; + uint8 pixel_size; + uint8 flags; + + enum { Size = 18 }; //const static int SIZE = 18; +}; + + +/// Tga File. +struct TgaFile { + + TgaFile() { + mem = NULL; + } + ~TgaFile() { + free(); + } + + uint size() const { + return head.width * head.height * (head.pixel_size / 8); + } + void allocate() { + nvCheck( mem == NULL ); + mem = new uint8[size()]; + } + void free() { + delete [] mem; + mem = NULL; + } + + TgaHeader head; + uint8 * mem; +}; + + +inline Stream & operator<< (Stream & s, TgaHeader & head) +{ + s << head.id_length << head.colormap_type << head.image_type; + s << head.colormap_index << head.colormap_length << head.colormap_size; + s << head.x_origin << head.y_origin << head.width << head.height; + s << head.pixel_size << head.flags; + return s; +} + +inline Stream & operator<< (Stream & s, TgaFile & tga) +{ + s << tga.head; + + if( s.isLoading() ) { + tga.allocate(); + } + + s.serialize( tga.mem, tga.size() ); + + return s; +} + +} // nv namespace + +#endif // NV_IMAGE_TGAFILE_H diff --git a/src/nvmath/Half.cpp b/src/nvmath/Half.cpp index b76794e..b0bd2a8 100644 --- a/src/nvmath/Half.cpp +++ b/src/nvmath/Half.cpp @@ -1,612 +1,612 @@ -// Branch-free implementation of half-precision (16 bit) floating point -// Copyright 2006 Mike Acton -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE -// -// Half-precision floating point format -// ------------------------------------ -// -// | Field | Last | First | Note -// |----------|------|-------|---------- -// | Sign | 15 | 15 | -// | Exponent | 14 | 10 | Bias = 15 -// | Mantissa | 9 | 0 | -// -// Compiling -// --------- -// -// Preferred compile flags for GCC: -// -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing -// -// This file is a C99 source file, intended to be compiled with a C99 -// compliant compiler. However, for the moment it remains combatible -// with C++98. Therefore if you are using a compiler that poorly implements -// C standards (e.g. MSVC), it may be compiled as C++. This is not -// guaranteed for future versions. -// -// Features -// -------- -// -// * QNaN + = QNaN -// * + +INF = +INF -// * - -INF = -INF -// * INF - INF = SNaN -// * Denormalized values -// * Difference of ZEROs is always +ZERO -// * Sum round with guard + round + sticky bit (grs) -// * And of course... no branching -// -// Precision of Sum -// ---------------- -// -// (SUM) uint16 z = half_add( x, y ); -// (DIFFERENCE) uint16 z = half_add( x, -y ); -// -// Will have exactly (0 ulps difference) the same result as: -// (For 32 bit IEEE 784 floating point and same rounding mode) -// -// union FLOAT_32 -// { -// float f32; -// uint32 u32; -// }; -// -// union FLOAT_32 fx = { .u32 = half_to_float( x ) }; -// union FLOAT_32 fy = { .u32 = half_to_float( y ) }; -// union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32 }; -// uint16 z = float_to_half( fz ); -// - -#include "Half.h" -#include - -// Load immediate -static inline uint32 _uint32_li( uint32 a ) -{ - return (a); -} - -// Decrement -static inline uint32 _uint32_dec( uint32 a ) -{ - return (a - 1); -} - -// Increment -static inline uint32 _uint32_inc( uint32 a ) -{ - return (a + 1); -} - -// Complement -static inline uint32 _uint32_not( uint32 a ) -{ - return (~a); -} - -// Negate -static inline uint32 _uint32_neg( uint32 a ) -{ -#pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned - return (-a); -#pragma warning(default : 4146) -} - -// Extend sign -static inline uint32 _uint32_ext( uint32 a ) -{ - return (((int32)a)>>31); -} - -// And -static inline uint32 _uint32_and( uint32 a, uint32 b ) -{ - return (a & b); -} - -// And with Complement -static inline uint32 _uint32_andc( uint32 a, uint32 b ) -{ - return (a & ~b); -} - -// Or -static inline uint32 _uint32_or( uint32 a, uint32 b ) -{ - return (a | b); -} - -// Shift Right Logical -static inline uint32 _uint32_srl( uint32 a, int sa ) -{ - return (a >> sa); -} - -// Shift Left Logical -static inline uint32 _uint32_sll( uint32 a, int sa ) -{ - return (a << sa); -} - -// Add -static inline uint32 _uint32_add( uint32 a, uint32 b ) -{ - return (a + b); -} - -// Subtract -static inline uint32 _uint32_sub( uint32 a, uint32 b ) -{ - return (a - b); -} - -// Select on Sign bit -static inline uint32 _uint32_sels( uint32 test, uint32 a, uint32 b ) -{ - const uint32 mask = _uint32_ext( test ); - const uint32 sel_a = _uint32_and( a, mask ); - const uint32 sel_b = _uint32_andc( b, mask ); - const uint32 result = _uint32_or( sel_a, sel_b ); - - return (result); -} - -// Load Immediate -static inline uint16 _uint16_li( uint16 a ) -{ - return (a); -} - -// Extend sign -static inline uint16 _uint16_ext( uint16 a ) -{ - return (((int16)a)>>15); -} - -// Negate -static inline uint16 _uint16_neg( uint16 a ) -{ - return (-a); -} - -// Complement -static inline uint16 _uint16_not( uint16 a ) -{ - return (~a); -} - -// Decrement -static inline uint16 _uint16_dec( uint16 a ) -{ - return (a - 1); -} - -// Shift Left Logical -static inline uint16 _uint16_sll( uint16 a, int sa ) -{ - return (a << sa); -} - -// Shift Right Logical -static inline uint16 _uint16_srl( uint16 a, int sa ) -{ - return (a >> sa); -} - -// Add -static inline uint16 _uint16_add( uint16 a, uint16 b ) -{ - return (a + b); -} - -// Subtract -static inline uint16 _uint16_sub( uint16 a, uint16 b ) -{ - return (a - b); -} - -// And -static inline uint16 _uint16_and( uint16 a, uint16 b ) -{ - return (a & b); -} - -// Or -static inline uint16 _uint16_or( uint16 a, uint16 b ) -{ - return (a | b); -} - -// Exclusive Or -static inline uint16 _uint16_xor( uint16 a, uint16 b ) -{ - return (a ^ b); -} - -// And with Complement -static inline uint16 _uint16_andc( uint16 a, uint16 b ) -{ - return (a & ~b); -} - -// And then Shift Right Logical -static inline uint16 _uint16_andsrl( uint16 a, uint16 b, int sa ) -{ - return ((a & b) >> sa); -} - -// Shift Right Logical then Mask -static inline uint16 _uint16_srlm( uint16 a, int sa, uint16 mask ) -{ - return ((a >> sa) & mask); -} - -// Add then Mask -static inline uint16 _uint16_addm( uint16 a, uint16 b, uint16 mask ) -{ - return ((a + b) & mask); -} - - -// Select on Sign bit -static inline uint16 _uint16_sels( uint16 test, uint16 a, uint16 b ) -{ - const uint16 mask = _uint16_ext( test ); - const uint16 sel_a = _uint16_and( a, mask ); - const uint16 sel_b = _uint16_andc( b, mask ); - const uint16 result = _uint16_or( sel_a, sel_b ); - - return (result); -} - -#if NV_OS_XBOX -#include -#elif NV_CC_MSVC - -#include -#pragma intrinsic(_BitScanReverse) - -uint32 _uint32_nlz( uint32 x ) { - unsigned long index; - _BitScanReverse(&index, x); - return 31 - index; -} -#endif - - -// Count Leading Zeros -static inline uint32 _uint32_cntlz( uint32 x ) -{ -#if NV_CC_GCC - /* On PowerPC, this will map to insn: cntlzw */ - /* On Pentium, this will map to insn: clz */ - uint32 is_x_nez_msb = _uint32_neg( x ); - uint32 nlz = __builtin_clz( x ); - uint32 result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 ); - return (result); -#elif NV_OS_XBOX - // Xbox PPC has this as an intrinsic. - return _CountLeadingZeros(x); -#elif NV_CC_MSVC - uint32 is_x_nez_msb = _uint32_neg( x ); - uint32 nlz = _uint32_nlz( x ); - uint32 result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 ); - return (result); -#else - const uint32 x0 = _uint32_srl( x, 1 ); - const uint32 x1 = _uint32_or( x, x0 ); - const uint32 x2 = _uint32_srl( x1, 2 ); - const uint32 x3 = _uint32_or( x1, x2 ); - const uint32 x4 = _uint32_srl( x3, 4 ); - const uint32 x5 = _uint32_or( x3, x4 ); - const uint32 x6 = _uint32_srl( x5, 8 ); - const uint32 x7 = _uint32_or( x5, x6 ); - const uint32 x8 = _uint32_srl( x7, 16 ); - const uint32 x9 = _uint32_or( x7, x8 ); - const uint32 xA = _uint32_not( x9 ); - const uint32 xB = _uint32_srl( xA, 1 ); - const uint32 xC = _uint32_and( xB, 0x55555555 ); - const uint32 xD = _uint32_sub( xA, xC ); - const uint32 xE = _uint32_and( xD, 0x33333333 ); - const uint32 xF = _uint32_srl( xD, 2 ); - const uint32 x10 = _uint32_and( xF, 0x33333333 ); - const uint32 x11 = _uint32_add( xE, x10 ); - const uint32 x12 = _uint32_srl( x11, 4 ); - const uint32 x13 = _uint32_add( x11, x12 ); - const uint32 x14 = _uint32_and( x13, 0x0f0f0f0f ); - const uint32 x15 = _uint32_srl( x14, 8 ); - const uint32 x16 = _uint32_add( x14, x15 ); - const uint32 x17 = _uint32_srl( x16, 16 ); - const uint32 x18 = _uint32_add( x16, x17 ); - const uint32 x19 = _uint32_and( x18, 0x0000003f ); - return ( x19 ); -#endif -} - -// Count Leading Zeros -static inline uint16 _uint16_cntlz( uint16 x ) -{ -#ifdef __GNUC__ - /* On PowerPC, this will map to insn: cntlzw */ - /* On Pentium, this will map to insn: clz */ - uint16 nlz32 = (uint16)_uint32_cntlz( (uint32)x ); - uint32 nlz = _uint32_sub( nlz32, 16 ); - return (nlz); -#elif _NV_OS_XBOX_ - uint16 nlz32 = (uint16)_CountLeadingZeros( (uint32)x ); - return _uint32_sub( nlz32, 16); -#else - const uint16 x0 = _uint16_srl( x, 1 ); - const uint16 x1 = _uint16_or( x, x0 ); - const uint16 x2 = _uint16_srl( x1, 2 ); - const uint16 x3 = _uint16_or( x1, x2 ); - const uint16 x4 = _uint16_srl( x3, 4 ); - const uint16 x5 = _uint16_or( x3, x4 ); - const uint16 x6 = _uint16_srl( x5, 8 ); - const uint16 x7 = _uint16_or( x5, x6 ); - const uint16 x8 = _uint16_not( x7 ); - const uint16 x9 = _uint16_srlm( x8, 1, 0x5555 ); - const uint16 xA = _uint16_sub( x8, x9 ); - const uint16 xB = _uint16_and( xA, 0x3333 ); - const uint16 xC = _uint16_srlm( xA, 2, 0x3333 ); - const uint16 xD = _uint16_add( xB, xC ); - const uint16 xE = _uint16_srl( xD, 4 ); - const uint16 xF = _uint16_addm( xD, xE, 0x0f0f ); - const uint16 x10 = _uint16_srl( xF, 8 ); - const uint16 x11 = _uint16_addm( xF, x10, 0x001f ); - return ( x11 ); -#endif -} - -uint16 -nv::half_from_float( uint32 f ) -{ - const uint32 one = _uint32_li( 0x00000001 ); - const uint32 f_s_mask = _uint32_li( 0x80000000 ); - const uint32 f_e_mask = _uint32_li( 0x7f800000 ); - const uint32 f_m_mask = _uint32_li( 0x007fffff ); - const uint32 f_m_hidden_bit = _uint32_li( 0x00800000 ); - const uint32 f_m_round_bit = _uint32_li( 0x00001000 ); - const uint32 f_snan_mask = _uint32_li( 0x7fc00000 ); - const uint32 f_e_pos = _uint32_li( 0x00000017 ); - const uint32 h_e_pos = _uint32_li( 0x0000000a ); - const uint32 h_e_mask = _uint32_li( 0x00007c00 ); - const uint32 h_snan_mask = _uint32_li( 0x00007e00 ); - const uint32 h_e_mask_value = _uint32_li( 0x0000001f ); - const uint32 f_h_s_pos_offset = _uint32_li( 0x00000010 ); - const uint32 f_h_bias_offset = _uint32_li( 0x00000070 ); - const uint32 f_h_m_pos_offset = _uint32_li( 0x0000000d ); - const uint32 h_nan_min = _uint32_li( 0x00007c01 ); - const uint32 f_h_e_biased_flag = _uint32_li( 0x0000008f ); - const uint32 f_s = _uint32_and( f, f_s_mask ); - const uint32 f_e = _uint32_and( f, f_e_mask ); - const uint16 h_s = _uint32_srl( f_s, f_h_s_pos_offset ); - const uint32 f_m = _uint32_and( f, f_m_mask ); - const uint16 f_e_amount = _uint32_srl( f_e, f_e_pos ); - const uint32 f_e_half_bias = _uint32_sub( f_e_amount, f_h_bias_offset ); - const uint32 f_snan = _uint32_and( f, f_snan_mask ); - const uint32 f_m_round_mask = _uint32_and( f_m, f_m_round_bit ); - const uint32 f_m_round_offset = _uint32_sll( f_m_round_mask, one ); - const uint32 f_m_rounded = _uint32_add( f_m, f_m_round_offset ); - const uint32 f_m_denorm_sa = _uint32_sub( one, f_e_half_bias ); - const uint32 f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit ); - const uint32 f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa ); - const uint32 h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset ); - const uint32 f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit ); - const uint32 m_nan = _uint32_srl( f_m, f_h_m_pos_offset ); - const uint32 h_em_nan = _uint32_or( h_e_mask, m_nan ); - const uint32 h_e_norm_overflow_offset = _uint32_inc( f_e_half_bias ); - const uint32 h_e_norm_overflow = _uint32_sll( h_e_norm_overflow_offset, h_e_pos ); - const uint32 h_e_norm = _uint32_sll( f_e_half_bias, h_e_pos ); - const uint32 h_m_norm = _uint32_srl( f_m_rounded, f_h_m_pos_offset ); - const uint32 h_em_norm = _uint32_or( h_e_norm, h_m_norm ); - const uint32 is_h_ndenorm_msb = _uint32_sub( f_h_bias_offset, f_e_amount ); - const uint32 is_f_e_flagged_msb = _uint32_sub( f_h_e_biased_flag, f_e_half_bias ); - const uint32 is_h_denorm_msb = _uint32_not( is_h_ndenorm_msb ); - const uint32 is_f_m_eqz_msb = _uint32_dec( f_m ); - const uint32 is_h_nan_eqz_msb = _uint32_dec( m_nan ); - const uint32 is_f_inf_msb = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb ); - const uint32 is_f_nan_underflow_msb = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb ); - const uint32 is_e_overflow_msb = _uint32_sub( h_e_mask_value, f_e_half_bias ); - const uint32 is_h_inf_msb = _uint32_or( is_e_overflow_msb, is_f_inf_msb ); - const uint32 is_f_nsnan_msb = _uint32_sub( f_snan, f_snan_mask ); - const uint32 is_m_norm_overflow_msb = _uint32_neg( f_m_rounded_overflow ); - const uint32 is_f_snan_msb = _uint32_not( is_f_nsnan_msb ); - const uint32 h_em_overflow_result = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm ); - const uint32 h_em_nan_result = _uint32_sels( is_f_e_flagged_msb, h_em_nan, h_em_overflow_result ); - const uint32 h_em_nan_underflow_result = _uint32_sels( is_f_nan_underflow_msb, h_nan_min, h_em_nan_result ); - const uint32 h_em_inf_result = _uint32_sels( is_h_inf_msb, h_e_mask, h_em_nan_underflow_result ); - const uint32 h_em_denorm_result = _uint32_sels( is_h_denorm_msb, h_m_denorm, h_em_inf_result ); - const uint32 h_em_snan_result = _uint32_sels( is_f_snan_msb, h_snan_mask, h_em_denorm_result ); - const uint32 h_result = _uint32_or( h_s, h_em_snan_result ); - - return (uint16)(h_result); -} - -uint32 -nv::half_to_float( uint16 h ) -{ - const uint32 h_e_mask = _uint32_li( 0x00007c00 ); - const uint32 h_m_mask = _uint32_li( 0x000003ff ); - const uint32 h_s_mask = _uint32_li( 0x00008000 ); - const uint32 h_f_s_pos_offset = _uint32_li( 0x00000010 ); - const uint32 h_f_e_pos_offset = _uint32_li( 0x0000000d ); - const uint32 h_f_bias_offset = _uint32_li( 0x0001c000 ); - const uint32 f_e_mask = _uint32_li( 0x7f800000 ); - const uint32 f_m_mask = _uint32_li( 0x007fffff ); - const uint32 h_f_e_denorm_bias = _uint32_li( 0x0000007e ); - const uint32 h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 ); - const uint32 f_e_pos = _uint32_li( 0x00000017 ); - const uint32 h_e_mask_minus_one = _uint32_li( 0x00007bff ); - const uint32 h_e = _uint32_and( h, h_e_mask ); - const uint32 h_m = _uint32_and( h, h_m_mask ); - const uint32 h_s = _uint32_and( h, h_s_mask ); - const uint32 h_e_f_bias = _uint32_add( h_e, h_f_bias_offset ); - const uint32 h_m_nlz = _uint32_cntlz( h_m ); - const uint32 f_s = _uint32_sll( h_s, h_f_s_pos_offset ); - const uint32 f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset ); - const uint32 f_m = _uint32_sll( h_m, h_f_e_pos_offset ); - const uint32 f_em = _uint32_or( f_e, f_m ); - const uint32 h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias ); - const uint32 f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa ); - const uint32 h_f_m = _uint32_sll( h_m, h_f_m_sa ); - const uint32 f_m_denorm = _uint32_and( h_f_m, f_m_mask ); - const uint32 f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos ); - const uint32 f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm ); - const uint32 f_em_nan = _uint32_or( f_e_mask, f_m ); - const uint32 is_e_eqz_msb = _uint32_dec( h_e ); - const uint32 is_m_nez_msb = _uint32_neg( h_m ); - const uint32 is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e ); - const uint32 is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb ); - const uint32 is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb ); - const uint32 is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb ); - const uint32 is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb ); - const uint32 is_zero = _uint32_ext( is_zero_msb ); - const uint32 f_zero_result = _uint32_andc( f_em, is_zero ); - const uint32 f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result ); - const uint32 f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result ); - const uint32 f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result ); - const uint32 f_result = _uint32_or( f_s, f_nan_result ); - - return (f_result); -} - - -// @@ These tables could be smaller. -static uint32 mantissa_table[2048]; -static uint32 exponent_table[64]; -static uint32 offset_table[64]; - -void nv::half_init_tables() -{ - // Init mantissa table. - mantissa_table[0] = 0; - - for (int i = 1; i < 1024; i++) { - uint m = i << 13; - uint e = 0; - - while ((m & 0x00800000) == 0) { - e -= 0x00800000; - m <<= 1; - } - m &= ~0x00800000; - e += 0x38800000; - mantissa_table[i] = m | e; - } - - for (int i = 1024; i < 2048; i++) { - mantissa_table[i] = 0x38000000 + ((i - 1024) << 13); - } - - - // Init exponent table. - exponent_table[0] = 0; - - for (int i = 1; i < 31; i++) { - exponent_table[i] = (i << 23); - } - - exponent_table[31] = 0x47800000; - exponent_table[32] = 0x80000000; - - for (int i = 33; i < 63; i++) { - exponent_table[i] = 0x80000000 + ((i - 32) << 23); - } - - exponent_table[63] = 0xC7800000; - - - // Init offset table. - offset_table[0] = 0; - - for (int i = 1; i < 32; i++) { - offset_table[i] = 1024; - } - - offset_table[32] = 0; - - for (int i = 33; i < 64; i++) { - offset_table[i] = 1024; - } - - /*for (int i = 0; i < 64; i++) { - offset_table[i] = ((i & 31) != 0) * 1024; - }*/ -} - -// Fast half to float conversion based on: -// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf -uint32 nv::fast_half_to_float(uint16 h) -{ - uint exp = h >> 10; - return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp]; -} - - -#if 0 -// Inaccurate conversion suggested at the ffmpeg mailing list: -// http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2009-July/068949.html -uint32 nv::fast_half_to_float(uint16 v) -{ - if (v & 0x8000) return 0; - uint exp = v >> 10; - if (!exp) return (v>>9)&1; - if (exp >= 15) return 0xffff; - v <<= 6; - return (v+(1<<16)) >> (15-exp); -} - -#endif - -#if 0 - -// Some more from a gamedev thread: -// http://www.devmaster.net/forums/showthread.php?t=10924 - -// I believe it does not handle specials either. - -// Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though. - - -static __declspec(align(16)) unsigned half_sign[4] = {0x00008000, 0x00008000, 0x00008000, 0x00008000}; -static __declspec(align(16)) unsigned half_exponent[4] = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00}; -static __declspec(align(16)) unsigned half_mantissa[4] = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF}; -static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000}; - -__asm -{ - movaps xmm1, xmm0 // Input in xmm0 - movaps xmm2, xmm0 - - andps xmm0, half_sign - andps xmm1, half_exponent - andps xmm2, half_mantissa - paddd xmm1, half_bias_offset - - pslld xmm0, 16 - pslld xmm1, 13 - pslld xmm2, 13 - - orps xmm1, xmm2 - orps xmm0, xmm1 // Result in xmm0 -} - - +// Branch-free implementation of half-precision (16 bit) floating point +// Copyright 2006 Mike Acton +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE +// +// Half-precision floating point format +// ------------------------------------ +// +// | Field | Last | First | Note +// |----------|------|-------|---------- +// | Sign | 15 | 15 | +// | Exponent | 14 | 10 | Bias = 15 +// | Mantissa | 9 | 0 | +// +// Compiling +// --------- +// +// Preferred compile flags for GCC: +// -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing +// +// This file is a C99 source file, intended to be compiled with a C99 +// compliant compiler. However, for the moment it remains combatible +// with C++98. Therefore if you are using a compiler that poorly implements +// C standards (e.g. MSVC), it may be compiled as C++. This is not +// guaranteed for future versions. +// +// Features +// -------- +// +// * QNaN + = QNaN +// * + +INF = +INF +// * - -INF = -INF +// * INF - INF = SNaN +// * Denormalized values +// * Difference of ZEROs is always +ZERO +// * Sum round with guard + round + sticky bit (grs) +// * And of course... no branching +// +// Precision of Sum +// ---------------- +// +// (SUM) uint16 z = half_add( x, y ); +// (DIFFERENCE) uint16 z = half_add( x, -y ); +// +// Will have exactly (0 ulps difference) the same result as: +// (For 32 bit IEEE 784 floating point and same rounding mode) +// +// union FLOAT_32 +// { +// float f32; +// uint32 u32; +// }; +// +// union FLOAT_32 fx = { .u32 = half_to_float( x ) }; +// union FLOAT_32 fy = { .u32 = half_to_float( y ) }; +// union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32 }; +// uint16 z = float_to_half( fz ); +// + +#include "Half.h" +#include + +// Load immediate +static inline uint32 _uint32_li( uint32 a ) +{ + return (a); +} + +// Decrement +static inline uint32 _uint32_dec( uint32 a ) +{ + return (a - 1); +} + +// Increment +static inline uint32 _uint32_inc( uint32 a ) +{ + return (a + 1); +} + +// Complement +static inline uint32 _uint32_not( uint32 a ) +{ + return (~a); +} + +// Negate +static inline uint32 _uint32_neg( uint32 a ) +{ +#pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned + return (-a); +#pragma warning(default : 4146) +} + +// Extend sign +static inline uint32 _uint32_ext( uint32 a ) +{ + return (((int32)a)>>31); +} + +// And +static inline uint32 _uint32_and( uint32 a, uint32 b ) +{ + return (a & b); +} + +// And with Complement +static inline uint32 _uint32_andc( uint32 a, uint32 b ) +{ + return (a & ~b); +} + +// Or +static inline uint32 _uint32_or( uint32 a, uint32 b ) +{ + return (a | b); +} + +// Shift Right Logical +static inline uint32 _uint32_srl( uint32 a, int sa ) +{ + return (a >> sa); +} + +// Shift Left Logical +static inline uint32 _uint32_sll( uint32 a, int sa ) +{ + return (a << sa); +} + +// Add +static inline uint32 _uint32_add( uint32 a, uint32 b ) +{ + return (a + b); +} + +// Subtract +static inline uint32 _uint32_sub( uint32 a, uint32 b ) +{ + return (a - b); +} + +// Select on Sign bit +static inline uint32 _uint32_sels( uint32 test, uint32 a, uint32 b ) +{ + const uint32 mask = _uint32_ext( test ); + const uint32 sel_a = _uint32_and( a, mask ); + const uint32 sel_b = _uint32_andc( b, mask ); + const uint32 result = _uint32_or( sel_a, sel_b ); + + return (result); +} + +// Load Immediate +static inline uint16 _uint16_li( uint16 a ) +{ + return (a); +} + +// Extend sign +static inline uint16 _uint16_ext( uint16 a ) +{ + return (((int16)a)>>15); +} + +// Negate +static inline uint16 _uint16_neg( uint16 a ) +{ + return (-a); +} + +// Complement +static inline uint16 _uint16_not( uint16 a ) +{ + return (~a); +} + +// Decrement +static inline uint16 _uint16_dec( uint16 a ) +{ + return (a - 1); +} + +// Shift Left Logical +static inline uint16 _uint16_sll( uint16 a, int sa ) +{ + return (a << sa); +} + +// Shift Right Logical +static inline uint16 _uint16_srl( uint16 a, int sa ) +{ + return (a >> sa); +} + +// Add +static inline uint16 _uint16_add( uint16 a, uint16 b ) +{ + return (a + b); +} + +// Subtract +static inline uint16 _uint16_sub( uint16 a, uint16 b ) +{ + return (a - b); +} + +// And +static inline uint16 _uint16_and( uint16 a, uint16 b ) +{ + return (a & b); +} + +// Or +static inline uint16 _uint16_or( uint16 a, uint16 b ) +{ + return (a | b); +} + +// Exclusive Or +static inline uint16 _uint16_xor( uint16 a, uint16 b ) +{ + return (a ^ b); +} + +// And with Complement +static inline uint16 _uint16_andc( uint16 a, uint16 b ) +{ + return (a & ~b); +} + +// And then Shift Right Logical +static inline uint16 _uint16_andsrl( uint16 a, uint16 b, int sa ) +{ + return ((a & b) >> sa); +} + +// Shift Right Logical then Mask +static inline uint16 _uint16_srlm( uint16 a, int sa, uint16 mask ) +{ + return ((a >> sa) & mask); +} + +// Add then Mask +static inline uint16 _uint16_addm( uint16 a, uint16 b, uint16 mask ) +{ + return ((a + b) & mask); +} + + +// Select on Sign bit +static inline uint16 _uint16_sels( uint16 test, uint16 a, uint16 b ) +{ + const uint16 mask = _uint16_ext( test ); + const uint16 sel_a = _uint16_and( a, mask ); + const uint16 sel_b = _uint16_andc( b, mask ); + const uint16 result = _uint16_or( sel_a, sel_b ); + + return (result); +} + +#if NV_OS_XBOX +#include +#elif NV_CC_MSVC + +#include +#pragma intrinsic(_BitScanReverse) + +uint32 _uint32_nlz( uint32 x ) { + unsigned long index; + _BitScanReverse(&index, x); + return 31 - index; +} +#endif + + +// Count Leading Zeros +static inline uint32 _uint32_cntlz( uint32 x ) +{ +#if NV_CC_GCC + /* On PowerPC, this will map to insn: cntlzw */ + /* On Pentium, this will map to insn: clz */ + uint32 is_x_nez_msb = _uint32_neg( x ); + uint32 nlz = __builtin_clz( x ); + uint32 result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 ); + return (result); +#elif NV_OS_XBOX + // Xbox PPC has this as an intrinsic. + return _CountLeadingZeros(x); +#elif NV_CC_MSVC + uint32 is_x_nez_msb = _uint32_neg( x ); + uint32 nlz = _uint32_nlz( x ); + uint32 result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 ); + return (result); +#else + const uint32 x0 = _uint32_srl( x, 1 ); + const uint32 x1 = _uint32_or( x, x0 ); + const uint32 x2 = _uint32_srl( x1, 2 ); + const uint32 x3 = _uint32_or( x1, x2 ); + const uint32 x4 = _uint32_srl( x3, 4 ); + const uint32 x5 = _uint32_or( x3, x4 ); + const uint32 x6 = _uint32_srl( x5, 8 ); + const uint32 x7 = _uint32_or( x5, x6 ); + const uint32 x8 = _uint32_srl( x7, 16 ); + const uint32 x9 = _uint32_or( x7, x8 ); + const uint32 xA = _uint32_not( x9 ); + const uint32 xB = _uint32_srl( xA, 1 ); + const uint32 xC = _uint32_and( xB, 0x55555555 ); + const uint32 xD = _uint32_sub( xA, xC ); + const uint32 xE = _uint32_and( xD, 0x33333333 ); + const uint32 xF = _uint32_srl( xD, 2 ); + const uint32 x10 = _uint32_and( xF, 0x33333333 ); + const uint32 x11 = _uint32_add( xE, x10 ); + const uint32 x12 = _uint32_srl( x11, 4 ); + const uint32 x13 = _uint32_add( x11, x12 ); + const uint32 x14 = _uint32_and( x13, 0x0f0f0f0f ); + const uint32 x15 = _uint32_srl( x14, 8 ); + const uint32 x16 = _uint32_add( x14, x15 ); + const uint32 x17 = _uint32_srl( x16, 16 ); + const uint32 x18 = _uint32_add( x16, x17 ); + const uint32 x19 = _uint32_and( x18, 0x0000003f ); + return ( x19 ); +#endif +} + +// Count Leading Zeros +static inline uint16 _uint16_cntlz( uint16 x ) +{ +#ifdef __GNUC__ + /* On PowerPC, this will map to insn: cntlzw */ + /* On Pentium, this will map to insn: clz */ + uint16 nlz32 = (uint16)_uint32_cntlz( (uint32)x ); + uint32 nlz = _uint32_sub( nlz32, 16 ); + return (nlz); +#elif _NV_OS_XBOX_ + uint16 nlz32 = (uint16)_CountLeadingZeros( (uint32)x ); + return _uint32_sub( nlz32, 16); +#else + const uint16 x0 = _uint16_srl( x, 1 ); + const uint16 x1 = _uint16_or( x, x0 ); + const uint16 x2 = _uint16_srl( x1, 2 ); + const uint16 x3 = _uint16_or( x1, x2 ); + const uint16 x4 = _uint16_srl( x3, 4 ); + const uint16 x5 = _uint16_or( x3, x4 ); + const uint16 x6 = _uint16_srl( x5, 8 ); + const uint16 x7 = _uint16_or( x5, x6 ); + const uint16 x8 = _uint16_not( x7 ); + const uint16 x9 = _uint16_srlm( x8, 1, 0x5555 ); + const uint16 xA = _uint16_sub( x8, x9 ); + const uint16 xB = _uint16_and( xA, 0x3333 ); + const uint16 xC = _uint16_srlm( xA, 2, 0x3333 ); + const uint16 xD = _uint16_add( xB, xC ); + const uint16 xE = _uint16_srl( xD, 4 ); + const uint16 xF = _uint16_addm( xD, xE, 0x0f0f ); + const uint16 x10 = _uint16_srl( xF, 8 ); + const uint16 x11 = _uint16_addm( xF, x10, 0x001f ); + return ( x11 ); +#endif +} + +uint16 +nv::half_from_float( uint32 f ) +{ + const uint32 one = _uint32_li( 0x00000001 ); + const uint32 f_s_mask = _uint32_li( 0x80000000 ); + const uint32 f_e_mask = _uint32_li( 0x7f800000 ); + const uint32 f_m_mask = _uint32_li( 0x007fffff ); + const uint32 f_m_hidden_bit = _uint32_li( 0x00800000 ); + const uint32 f_m_round_bit = _uint32_li( 0x00001000 ); + const uint32 f_snan_mask = _uint32_li( 0x7fc00000 ); + const uint32 f_e_pos = _uint32_li( 0x00000017 ); + const uint32 h_e_pos = _uint32_li( 0x0000000a ); + const uint32 h_e_mask = _uint32_li( 0x00007c00 ); + const uint32 h_snan_mask = _uint32_li( 0x00007e00 ); + const uint32 h_e_mask_value = _uint32_li( 0x0000001f ); + const uint32 f_h_s_pos_offset = _uint32_li( 0x00000010 ); + const uint32 f_h_bias_offset = _uint32_li( 0x00000070 ); + const uint32 f_h_m_pos_offset = _uint32_li( 0x0000000d ); + const uint32 h_nan_min = _uint32_li( 0x00007c01 ); + const uint32 f_h_e_biased_flag = _uint32_li( 0x0000008f ); + const uint32 f_s = _uint32_and( f, f_s_mask ); + const uint32 f_e = _uint32_and( f, f_e_mask ); + const uint16 h_s = _uint32_srl( f_s, f_h_s_pos_offset ); + const uint32 f_m = _uint32_and( f, f_m_mask ); + const uint16 f_e_amount = _uint32_srl( f_e, f_e_pos ); + const uint32 f_e_half_bias = _uint32_sub( f_e_amount, f_h_bias_offset ); + const uint32 f_snan = _uint32_and( f, f_snan_mask ); + const uint32 f_m_round_mask = _uint32_and( f_m, f_m_round_bit ); + const uint32 f_m_round_offset = _uint32_sll( f_m_round_mask, one ); + const uint32 f_m_rounded = _uint32_add( f_m, f_m_round_offset ); + const uint32 f_m_denorm_sa = _uint32_sub( one, f_e_half_bias ); + const uint32 f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit ); + const uint32 f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa ); + const uint32 h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset ); + const uint32 f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit ); + const uint32 m_nan = _uint32_srl( f_m, f_h_m_pos_offset ); + const uint32 h_em_nan = _uint32_or( h_e_mask, m_nan ); + const uint32 h_e_norm_overflow_offset = _uint32_inc( f_e_half_bias ); + const uint32 h_e_norm_overflow = _uint32_sll( h_e_norm_overflow_offset, h_e_pos ); + const uint32 h_e_norm = _uint32_sll( f_e_half_bias, h_e_pos ); + const uint32 h_m_norm = _uint32_srl( f_m_rounded, f_h_m_pos_offset ); + const uint32 h_em_norm = _uint32_or( h_e_norm, h_m_norm ); + const uint32 is_h_ndenorm_msb = _uint32_sub( f_h_bias_offset, f_e_amount ); + const uint32 is_f_e_flagged_msb = _uint32_sub( f_h_e_biased_flag, f_e_half_bias ); + const uint32 is_h_denorm_msb = _uint32_not( is_h_ndenorm_msb ); + const uint32 is_f_m_eqz_msb = _uint32_dec( f_m ); + const uint32 is_h_nan_eqz_msb = _uint32_dec( m_nan ); + const uint32 is_f_inf_msb = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb ); + const uint32 is_f_nan_underflow_msb = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb ); + const uint32 is_e_overflow_msb = _uint32_sub( h_e_mask_value, f_e_half_bias ); + const uint32 is_h_inf_msb = _uint32_or( is_e_overflow_msb, is_f_inf_msb ); + const uint32 is_f_nsnan_msb = _uint32_sub( f_snan, f_snan_mask ); + const uint32 is_m_norm_overflow_msb = _uint32_neg( f_m_rounded_overflow ); + const uint32 is_f_snan_msb = _uint32_not( is_f_nsnan_msb ); + const uint32 h_em_overflow_result = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm ); + const uint32 h_em_nan_result = _uint32_sels( is_f_e_flagged_msb, h_em_nan, h_em_overflow_result ); + const uint32 h_em_nan_underflow_result = _uint32_sels( is_f_nan_underflow_msb, h_nan_min, h_em_nan_result ); + const uint32 h_em_inf_result = _uint32_sels( is_h_inf_msb, h_e_mask, h_em_nan_underflow_result ); + const uint32 h_em_denorm_result = _uint32_sels( is_h_denorm_msb, h_m_denorm, h_em_inf_result ); + const uint32 h_em_snan_result = _uint32_sels( is_f_snan_msb, h_snan_mask, h_em_denorm_result ); + const uint32 h_result = _uint32_or( h_s, h_em_snan_result ); + + return (uint16)(h_result); +} + +uint32 +nv::half_to_float( uint16 h ) +{ + const uint32 h_e_mask = _uint32_li( 0x00007c00 ); + const uint32 h_m_mask = _uint32_li( 0x000003ff ); + const uint32 h_s_mask = _uint32_li( 0x00008000 ); + const uint32 h_f_s_pos_offset = _uint32_li( 0x00000010 ); + const uint32 h_f_e_pos_offset = _uint32_li( 0x0000000d ); + const uint32 h_f_bias_offset = _uint32_li( 0x0001c000 ); + const uint32 f_e_mask = _uint32_li( 0x7f800000 ); + const uint32 f_m_mask = _uint32_li( 0x007fffff ); + const uint32 h_f_e_denorm_bias = _uint32_li( 0x0000007e ); + const uint32 h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 ); + const uint32 f_e_pos = _uint32_li( 0x00000017 ); + const uint32 h_e_mask_minus_one = _uint32_li( 0x00007bff ); + const uint32 h_e = _uint32_and( h, h_e_mask ); + const uint32 h_m = _uint32_and( h, h_m_mask ); + const uint32 h_s = _uint32_and( h, h_s_mask ); + const uint32 h_e_f_bias = _uint32_add( h_e, h_f_bias_offset ); + const uint32 h_m_nlz = _uint32_cntlz( h_m ); + const uint32 f_s = _uint32_sll( h_s, h_f_s_pos_offset ); + const uint32 f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset ); + const uint32 f_m = _uint32_sll( h_m, h_f_e_pos_offset ); + const uint32 f_em = _uint32_or( f_e, f_m ); + const uint32 h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias ); + const uint32 f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa ); + const uint32 h_f_m = _uint32_sll( h_m, h_f_m_sa ); + const uint32 f_m_denorm = _uint32_and( h_f_m, f_m_mask ); + const uint32 f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos ); + const uint32 f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm ); + const uint32 f_em_nan = _uint32_or( f_e_mask, f_m ); + const uint32 is_e_eqz_msb = _uint32_dec( h_e ); + const uint32 is_m_nez_msb = _uint32_neg( h_m ); + const uint32 is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e ); + const uint32 is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb ); + const uint32 is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb ); + const uint32 is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb ); + const uint32 is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb ); + const uint32 is_zero = _uint32_ext( is_zero_msb ); + const uint32 f_zero_result = _uint32_andc( f_em, is_zero ); + const uint32 f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result ); + const uint32 f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result ); + const uint32 f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result ); + const uint32 f_result = _uint32_or( f_s, f_nan_result ); + + return (f_result); +} + + +// @@ These tables could be smaller. +static uint32 mantissa_table[2048]; +static uint32 exponent_table[64]; +static uint32 offset_table[64]; + +void nv::half_init_tables() +{ + // Init mantissa table. + mantissa_table[0] = 0; + + for (int i = 1; i < 1024; i++) { + uint m = i << 13; + uint e = 0; + + while ((m & 0x00800000) == 0) { + e -= 0x00800000; + m <<= 1; + } + m &= ~0x00800000; + e += 0x38800000; + mantissa_table[i] = m | e; + } + + for (int i = 1024; i < 2048; i++) { + mantissa_table[i] = 0x38000000 + ((i - 1024) << 13); + } + + + // Init exponent table. + exponent_table[0] = 0; + + for (int i = 1; i < 31; i++) { + exponent_table[i] = (i << 23); + } + + exponent_table[31] = 0x47800000; + exponent_table[32] = 0x80000000; + + for (int i = 33; i < 63; i++) { + exponent_table[i] = 0x80000000 + ((i - 32) << 23); + } + + exponent_table[63] = 0xC7800000; + + + // Init offset table. + offset_table[0] = 0; + + for (int i = 1; i < 32; i++) { + offset_table[i] = 1024; + } + + offset_table[32] = 0; + + for (int i = 33; i < 64; i++) { + offset_table[i] = 1024; + } + + /*for (int i = 0; i < 64; i++) { + offset_table[i] = ((i & 31) != 0) * 1024; + }*/ +} + +// Fast half to float conversion based on: +// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf +uint32 nv::fast_half_to_float(uint16 h) +{ + uint exp = h >> 10; + return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp]; +} + + +#if 0 +// Inaccurate conversion suggested at the ffmpeg mailing list: +// http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2009-July/068949.html +uint32 nv::fast_half_to_float(uint16 v) +{ + if (v & 0x8000) return 0; + uint exp = v >> 10; + if (!exp) return (v>>9)&1; + if (exp >= 15) return 0xffff; + v <<= 6; + return (v+(1<<16)) >> (15-exp); +} + +#endif + +#if 0 + +// Some more from a gamedev thread: +// http://www.devmaster.net/forums/showthread.php?t=10924 + +// I believe it does not handle specials either. + +// Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though. + + +static __declspec(align(16)) unsigned half_sign[4] = {0x00008000, 0x00008000, 0x00008000, 0x00008000}; +static __declspec(align(16)) unsigned half_exponent[4] = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00}; +static __declspec(align(16)) unsigned half_mantissa[4] = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF}; +static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000}; + +__asm +{ + movaps xmm1, xmm0 // Input in xmm0 + movaps xmm2, xmm0 + + andps xmm0, half_sign + andps xmm1, half_exponent + andps xmm2, half_mantissa + paddd xmm1, half_bias_offset + + pslld xmm0, 16 + pslld xmm1, 13 + pslld xmm2, 13 + + orps xmm1, xmm2 + orps xmm0, xmm1 // Result in xmm0 +} + + #endif \ No newline at end of file diff --git a/src/nvmath/Half.h b/src/nvmath/Half.h index 08f8f11..f732e93 100644 --- a/src/nvmath/Half.h +++ b/src/nvmath/Half.h @@ -1,30 +1,30 @@ -#pragma once -#ifndef NV_MATH_HALF_H -#define NV_MATH_HALF_H - -#include "nvmath.h" - -namespace nv { - - uint32 half_to_float( uint16 h ); - uint16 half_from_float( uint32 f ); - - void half_init_tables(); - - uint32 fast_half_to_float(uint16 h); - - inline uint16 to_half(float c) { - union { float f; uint32 u; } f; - f.f = c; - return nv::half_from_float( f.u ); - } - - inline float to_float(uint16 c) { - union { float f; uint32 u; } f; - f.u = nv::fast_half_to_float( c ); - return f.f; - } - -} // nv namespace - -#endif // NV_MATH_HALF_H +#pragma once +#ifndef NV_MATH_HALF_H +#define NV_MATH_HALF_H + +#include "nvmath.h" + +namespace nv { + + uint32 half_to_float( uint16 h ); + uint16 half_from_float( uint32 f ); + + void half_init_tables(); + + uint32 fast_half_to_float(uint16 h); + + inline uint16 to_half(float c) { + union { float f; uint32 u; } f; + f.f = c; + return nv::half_from_float( f.u ); + } + + inline float to_float(uint16 c) { + union { float f; uint32 u; } f; + f.u = nv::fast_half_to_float( c ); + return f.f; + } + +} // nv namespace + +#endif // NV_MATH_HALF_H diff --git a/src/nvmath/Matrix.h b/src/nvmath/Matrix.h index 5bd2cab..273e639 100644 --- a/src/nvmath/Matrix.h +++ b/src/nvmath/Matrix.h @@ -1,1199 +1,1199 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#pragma once -#ifndef NV_MATH_MATRIX_H -#define NV_MATH_MATRIX_H - -#include -#include - -namespace nv -{ - enum identity_t { identity }; - - class NVMATH_CLASS Matrix3 - { - public: - Matrix3(); - explicit Matrix3(float f); - explicit Matrix3(identity_t); - Matrix3(const Matrix3 & m); - Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2); - - scalar get(uint row, uint col) const; - scalar operator()(uint row, uint col) const; - scalar & operator()(uint row, uint col); - - Vector3 row(uint i) const; - Vector3 column(uint i) const; - - void operator*=(float s); - void operator/=(float s); - void operator+=(const Matrix3 & m); - void operator-=(const Matrix3 & m); - - float determinant() const; - - private: - scalar m_data[9]; - }; - - inline Matrix3::Matrix3() {} - - inline Matrix3::Matrix3(float f) - { - for(int i = 0; i < 9; i++) { - m_data[i] = f; - } - } - - inline Matrix3::Matrix3(identity_t) - { - for(int i = 0; i < 3; i++) { - for(int j = 0; j < 3; j++) { - m_data[3*j+i] = (i == j) ? 1.0f : 0.0f; - } - } - } - - inline Matrix3::Matrix3(const Matrix3 & m) - { - for(int i = 0; i < 9; i++) { - m_data[i] = m.m_data[i]; - } - } - - inline Matrix3::Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2) - { - m_data[0] = v0.x; m_data[1] = v0.y; m_data[2] = v0.z; - m_data[3] = v1.x; m_data[4] = v1.y; m_data[5] = v1.z; - m_data[6] = v2.x; m_data[7] = v2.y; m_data[8] = v2.z; - } - - inline scalar Matrix3::get(uint row, uint col) const - { - nvDebugCheck(row < 3 && col < 3); - return m_data[col * 3 + row]; - } - inline scalar Matrix3::operator()(uint row, uint col) const - { - nvDebugCheck(row < 3 && col < 3); - return m_data[col * 3 + row]; - } - inline scalar & Matrix3::operator()(uint row, uint col) - { - nvDebugCheck(row < 3 && col < 3); - return m_data[col * 3 + row]; - } - - inline Vector3 Matrix3::row(uint i) const - { - nvDebugCheck(i < 3); - return Vector3(get(i, 0), get(i, 1), get(i, 2)); - } - inline Vector3 Matrix3::column(uint i) const - { - nvDebugCheck(i < 3); - return Vector3(get(0, i), get(1, i), get(2, i)); - } - - inline void Matrix3::operator*=(float s) - { - for(int i = 0; i < 9; i++) { - m_data[i] *= s; - } - } - - inline void Matrix3::operator/=(float s) - { - float is = 1.0f /s; - for(int i = 0; i < 9; i++) { - m_data[i] *= is; - } - } - - inline void Matrix3::operator+=(const Matrix3 & m) - { - for(int i = 0; i < 9; i++) { - m_data[i] += m.m_data[i]; - } - } - - inline void Matrix3::operator-=(const Matrix3 & m) - { - for(int i = 0; i < 9; i++) { - m_data[i] -= m.m_data[i]; - } - } - - inline Matrix3 operator+(const Matrix3 & a, const Matrix3 & b) - { - Matrix3 m = a; - m += b; - return m; - } - - inline Matrix3 operator-(const Matrix3 & a, const Matrix3 & b) - { - Matrix3 m = a; - m -= b; - return m; - } - - inline Matrix3 operator*(const Matrix3 & a, float s) - { - Matrix3 m = a; - m *= s; - return m; - } - - inline Matrix3 operator*(float s, const Matrix3 & a) - { - Matrix3 m = a; - m *= s; - return m; - } - - inline Matrix3 operator/(const Matrix3 & a, float s) - { - Matrix3 m = a; - m /= s; - return m; - } - - inline Matrix3 mul(const Matrix3 & a, const Matrix3 & b) - { - Matrix3 m; - - for(int i = 0; i < 3; i++) { - const scalar ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2); - m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0) + ai2 * b(2,0); - m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1) + ai2 * b(2,1); - m(i, 2) = ai0 * b(0,2) + ai1 * b(1,2) + ai2 * b(2,2); - } - - return m; - } - - inline Matrix3 operator*(const Matrix3 & a, const Matrix3 & b) - { - return mul(a, b); - } - - inline float Matrix3::determinant() const - { - return - get(0,0) * get(1,1) * get(2,2) + - get(0,1) * get(1,2) * get(2,0) + - get(0,2) * get(1,0) * get(2,1) - - get(0,2) * get(1,1) * get(2,0) - - get(0,1) * get(1,0) * get(2,2) - - get(0,0) * get(1,2) * get(2,1); - } - - - - /// 4x4 transformation matrix. - /// -# Matrices are stored in memory in column major order. - /// -# Points are to be though of as column vectors. - /// -# Transformation of a point p by a matrix M is: p' = M * p - class NVMATH_CLASS Matrix - { - public: - typedef Matrix const & Arg; - - Matrix(); - explicit Matrix(float f); - explicit Matrix(identity_t); - Matrix(const Matrix & m); - Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3); - //explicit Matrix(const scalar m[]); // m is assumed to contain 16 elements - - scalar data(uint idx) const; - scalar & data(uint idx); - scalar get(uint row, uint col) const; - scalar operator()(uint row, uint col) const; - scalar & operator()(uint row, uint col); - const scalar * ptr() const; - - Vector4 row(uint i) const; - Vector4 column(uint i) const; - - void scale(scalar s); - void scale(Vector3::Arg s); - void translate(Vector3::Arg t); - void rotate(scalar theta, scalar v0, scalar v1, scalar v2); - scalar determinant() const; - - void apply(Matrix::Arg m); - - private: - scalar m_data[16]; - }; - - - inline Matrix::Matrix() - { - } - - inline Matrix::Matrix(float f) - { - for(int i = 0; i < 16; i++) { - m_data[i] = 0.0f; - } - } - - inline Matrix::Matrix(identity_t) - { - for(int i = 0; i < 4; i++) { - for(int j = 0; j < 4; j++) { - m_data[4*j+i] = (i == j) ? 1.0f : 0.0f; - } - } - } - - inline Matrix::Matrix(const Matrix & m) - { - for(int i = 0; i < 16; i++) { - m_data[i] = m.m_data[i]; - } - } - - inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3) - { - m_data[ 0] = v0.x; m_data[ 1] = v0.y; m_data[ 2] = v0.z; m_data[ 3] = v0.w; - m_data[ 4] = v1.x; m_data[ 5] = v1.y; m_data[ 6] = v1.z; m_data[ 7] = v1.w; - m_data[ 8] = v2.x; m_data[ 9] = v2.y; m_data[10] = v2.z; m_data[11] = v2.w; - m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w; - } - - /*inline Matrix::Matrix(const scalar m[]) - { - for(int i = 0; i < 16; i++) { - m_data[i] = m[i]; - } - }*/ - - - // Accessors - inline scalar Matrix::data(uint idx) const - { - nvDebugCheck(idx < 16); - return m_data[idx]; - } - inline scalar & Matrix::data(uint idx) - { - nvDebugCheck(idx < 16); - return m_data[idx]; - } - inline scalar Matrix::get(uint row, uint col) const - { - nvDebugCheck(row < 4 && col < 4); - return m_data[col * 4 + row]; - } - inline scalar Matrix::operator()(uint row, uint col) const - { - nvDebugCheck(row < 4 && col < 4); - return m_data[col * 4 + row]; - } - inline scalar & Matrix::operator()(uint row, uint col) - { - nvDebugCheck(row < 4 && col < 4); - return m_data[col * 4 + row]; - } - - inline const scalar * Matrix::ptr() const - { - return m_data; - } - - inline Vector4 Matrix::row(uint i) const - { - nvDebugCheck(i < 4); - return Vector4(get(i, 0), get(i, 1), get(i, 2), get(i, 3)); - } - - inline Vector4 Matrix::column(uint i) const - { - nvDebugCheck(i < 4); - return Vector4(get(0, i), get(1, i), get(2, i), get(3, i)); - } - - /// Apply scale. - inline void Matrix::scale(scalar s) - { - m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s; - m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s; - m_data[8] *= s; m_data[9] *= s; m_data[10] *= s; m_data[11] *= s; - m_data[12] *= s; m_data[13] *= s; m_data[14] *= s; m_data[15] *= s; - } - - /// Apply scale. - inline void Matrix::scale(Vector3::Arg s) - { - m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x; m_data[3] *= s.x; - m_data[4] *= s.y; m_data[5] *= s.y; m_data[6] *= s.y; m_data[7] *= s.y; - m_data[8] *= s.z; m_data[9] *= s.z; m_data[10] *= s.z; m_data[11] *= s.z; - } - - /// Apply translation. - inline void Matrix::translate(Vector3::Arg t) - { - m_data[12] = m_data[0] * t.x + m_data[4] * t.y + m_data[8] * t.z + m_data[12]; - m_data[13] = m_data[1] * t.x + m_data[5] * t.y + m_data[9] * t.z + m_data[13]; - m_data[14] = m_data[2] * t.x + m_data[6] * t.y + m_data[10] * t.z + m_data[14]; - m_data[15] = m_data[3] * t.x + m_data[7] * t.y + m_data[11] * t.z + m_data[15]; - } - - Matrix rotation(scalar theta, scalar v0, scalar v1, scalar v2); - - /// Apply rotation. - inline void Matrix::rotate(scalar theta, scalar v0, scalar v1, scalar v2) - { - Matrix R(rotation(theta, v0, v1, v2)); - apply(R); - } - - /// Apply transform. - inline void Matrix::apply(Matrix::Arg m) - { - nvDebugCheck(this != &m); - - for(int i = 0; i < 4; i++) { - const scalar ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3); - m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0); - m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1); - m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2); - m_data[12+ i] = ai0 * m(0,3) + ai1 * m(1,3) + ai2 * m(2,3) + ai3 * m(3,3); - } - } - - /// Get scale matrix. - inline Matrix scale(Vector3::Arg s) - { - Matrix m(identity); - m(0,0) = s.x; - m(1,1) = s.y; - m(2,2) = s.z; - return m; - } - - /// Get scale matrix. - inline Matrix scale(scalar s) - { - Matrix m(identity); - m(0,0) = m(1,1) = m(2,2) = s; - return m; - } - - /// Get translation matrix. - inline Matrix translation(Vector3::Arg t) - { - Matrix m(identity); - m(0,3) = t.x; - m(1,3) = t.y; - m(2,3) = t.z; - return m; - } - - /// Get rotation matrix. - inline Matrix rotation(scalar theta, scalar v0, scalar v1, scalar v2) - { - scalar cost = cosf(theta); - scalar sint = sinf(theta); - - Matrix m(identity); - - if( 1 == v0 && 0 == v1 && 0 == v2 ) { - m(1,1) = cost; m(2,1) = -sint; - m(1,2) = sint; m(2,2) = cost; - } - else if( 0 == v0 && 1 == v1 && 0 == v2 ) { - m(0,0) = cost; m(2,0) = sint; - m(1,2) = -sint; m(2,2) = cost; - } - else if( 0 == v0 && 0 == v1 && 1 == v2 ) { - m(0,0) = cost; m(1,0) = -sint; - m(0,1) = sint; m(1,1) = cost; - } - else { - scalar a2, b2, c2; - a2 = v0 * v0; - b2 = v1 * v1; - c2 = v2 * v2; - - scalar iscale = 1.0f / sqrtf(a2 + b2 + c2); - v0 *= iscale; - v1 *= iscale; - v2 *= iscale; - - scalar abm, acm, bcm; - scalar mcos, asin, bsin, csin; - mcos = 1.0f - cost; - abm = v0 * v1 * mcos; - acm = v0 * v2 * mcos; - bcm = v1 * v2 * mcos; - asin = v0 * sint; - bsin = v1 * sint; - csin = v2 * sint; - m(0,0) = a2 * mcos + cost; - m(1,0) = abm - csin; - m(2,0) = acm + bsin; - m(3,0) = abm + csin; - m(1,1) = b2 * mcos + cost; - m(2,1) = bcm - asin; - m(3,1) = acm - bsin; - m(1,2) = bcm + asin; - m(2,2) = c2 * mcos + cost; - } - return m; - } - - //Matrix rotation(scalar yaw, scalar pitch, scalar roll); - //Matrix skew(scalar angle, Vector3::Arg v1, Vector3::Arg v2); - - /// Get frustum matrix. - inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar) - { - Matrix m(0.0f); - - scalar doubleznear = 2.0f * zNear; - scalar one_deltax = 1.0f / (xmax - xmin); - scalar one_deltay = 1.0f / (ymax - ymin); - scalar one_deltaz = 1.0f / (zFar - zNear); - - m(0,0) = doubleznear * one_deltax; - m(1,1) = doubleznear * one_deltay; - m(0,2) = (xmax + xmin) * one_deltax; - m(1,2) = (ymax + ymin) * one_deltay; - m(2,2) = -(zFar + zNear) * one_deltaz; - m(3,2) = -1.0f; - m(2,3) = -(zFar * doubleznear) * one_deltaz; - - return m; - } - - /// Get infinite frustum matrix. - inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear) - { - Matrix m(0.0f); - - scalar doubleznear = 2.0f * zNear; - scalar one_deltax = 1.0f / (xmax - xmin); - scalar one_deltay = 1.0f / (ymax - ymin); - scalar nudge = 1.0; // 0.999; - - m(0,0) = doubleznear * one_deltax; - m(1,1) = doubleznear * one_deltay; - m(0,2) = (xmax + xmin) * one_deltax; - m(1,2) = (ymax + ymin) * one_deltay; - m(2,2) = -1.0f * nudge; - m(3,2) = -1.0f; - m(2,3) = -doubleznear * nudge; - - return m; - } - - /// Get perspective matrix. - inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear, scalar zFar) - { - scalar xmax = zNear * tan(fovy / 2); - scalar xmin = -xmax; - - scalar ymax = xmax / aspect; - scalar ymin = -ymax; - - return frustum(xmin, xmax, ymin, ymax, zNear, zFar); - } - - /// Get infinite perspective matrix. - inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear) - { - scalar x = zNear * tan(fovy / 2); - scalar y = x / aspect; - return frustum( -x, x, -y, y, zNear ); - } - - /// Get matrix determinant. - inline scalar Matrix::determinant() const - { - return - m_data[3] * m_data[6] * m_data[ 9] * m_data[12] - m_data[2] * m_data[7] * m_data[ 9] * m_data[12] - m_data[3] * m_data[5] * m_data[10] * m_data[12] + m_data[1] * m_data[7] * m_data[10] * m_data[12] + - m_data[2] * m_data[5] * m_data[11] * m_data[12] - m_data[1] * m_data[6] * m_data[11] * m_data[12] - m_data[3] * m_data[6] * m_data[ 8] * m_data[13] + m_data[2] * m_data[7] * m_data[ 8] * m_data[13] + - m_data[3] * m_data[4] * m_data[10] * m_data[13] - m_data[0] * m_data[7] * m_data[10] * m_data[13] - m_data[2] * m_data[4] * m_data[11] * m_data[13] + m_data[0] * m_data[6] * m_data[11] * m_data[13] + - m_data[3] * m_data[5] * m_data[ 8] * m_data[14] - m_data[1] * m_data[7] * m_data[ 8] * m_data[14] - m_data[3] * m_data[4] * m_data[ 9] * m_data[14] + m_data[0] * m_data[7] * m_data[ 9] * m_data[14] + - m_data[1] * m_data[4] * m_data[11] * m_data[14] - m_data[0] * m_data[5] * m_data[11] * m_data[14] - m_data[2] * m_data[5] * m_data[ 8] * m_data[15] + m_data[1] * m_data[6] * m_data[ 8] * m_data[15] + - m_data[2] * m_data[4] * m_data[ 9] * m_data[15] - m_data[0] * m_data[6] * m_data[ 9] * m_data[15] - m_data[1] * m_data[4] * m_data[10] * m_data[15] + m_data[0] * m_data[5] * m_data[10] * m_data[15]; - } - - inline Matrix transpose(Matrix::Arg m) - { - Matrix r; - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 4; j++) - { - r(i, j) = m(j, i); - } - } - return r; - } - - inline Matrix inverse(Matrix::Arg m) - { - Matrix r; - r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15); - r.data( 1) = m.data(3)*m.data(10)*m.data(13) - m.data(2)*m.data(11)*m.data(13) - m.data(3)*m.data(9)*m.data(14) + m.data(1)*m.data(11)*m.data(14) + m.data(2)*m.data(9)*m.data(15) - m.data(1)*m.data(10)*m.data(15); - r.data( 2) = m.data(2)*m.data( 7)*m.data(13) - m.data(3)*m.data( 6)*m.data(13) + m.data(3)*m.data(5)*m.data(14) - m.data(1)*m.data( 7)*m.data(14) - m.data(2)*m.data(5)*m.data(15) + m.data(1)*m.data( 6)*m.data(15); - r.data( 3) = m.data(3)*m.data( 6)*m.data( 9) - m.data(2)*m.data( 7)*m.data( 9) - m.data(3)*m.data(5)*m.data(10) + m.data(1)*m.data( 7)*m.data(10) + m.data(2)*m.data(5)*m.data(11) - m.data(1)*m.data( 6)*m.data(11); - r.data( 4) = m.data(7)*m.data(10)*m.data(12) - m.data(6)*m.data(11)*m.data(12) - m.data(7)*m.data(8)*m.data(14) + m.data(4)*m.data(11)*m.data(14) + m.data(6)*m.data(8)*m.data(15) - m.data(4)*m.data(10)*m.data(15); - r.data( 5) = m.data(2)*m.data(11)*m.data(12) - m.data(3)*m.data(10)*m.data(12) + m.data(3)*m.data(8)*m.data(14) - m.data(0)*m.data(11)*m.data(14) - m.data(2)*m.data(8)*m.data(15) + m.data(0)*m.data(10)*m.data(15); - r.data( 6) = m.data(3)*m.data( 6)*m.data(12) - m.data(2)*m.data( 7)*m.data(12) - m.data(3)*m.data(4)*m.data(14) + m.data(0)*m.data( 7)*m.data(14) + m.data(2)*m.data(4)*m.data(15) - m.data(0)*m.data( 6)*m.data(15); - r.data( 7) = m.data(2)*m.data( 7)*m.data( 8) - m.data(3)*m.data( 6)*m.data( 8) + m.data(3)*m.data(4)*m.data(10) - m.data(0)*m.data( 7)*m.data(10) - m.data(2)*m.data(4)*m.data(11) + m.data(0)*m.data( 6)*m.data(11); - r.data( 8) = m.data(5)*m.data(11)*m.data(12) - m.data(7)*m.data( 9)*m.data(12) + m.data(7)*m.data(8)*m.data(13) - m.data(4)*m.data(11)*m.data(13) - m.data(5)*m.data(8)*m.data(15) + m.data(4)*m.data( 9)*m.data(15); - r.data( 9) = m.data(3)*m.data( 9)*m.data(12) - m.data(1)*m.data(11)*m.data(12) - m.data(3)*m.data(8)*m.data(13) + m.data(0)*m.data(11)*m.data(13) + m.data(1)*m.data(8)*m.data(15) - m.data(0)*m.data( 9)*m.data(15); - r.data(10) = m.data(1)*m.data( 7)*m.data(12) - m.data(3)*m.data( 5)*m.data(12) + m.data(3)*m.data(4)*m.data(13) - m.data(0)*m.data( 7)*m.data(13) - m.data(1)*m.data(4)*m.data(15) + m.data(0)*m.data( 5)*m.data(15); - r.data(11) = m.data(3)*m.data( 5)*m.data( 8) - m.data(1)*m.data( 7)*m.data( 8) - m.data(3)*m.data(4)*m.data( 9) + m.data(0)*m.data( 7)*m.data( 9) + m.data(1)*m.data(4)*m.data(11) - m.data(0)*m.data( 5)*m.data(11); - r.data(12) = m.data(6)*m.data( 9)*m.data(12) - m.data(5)*m.data(10)*m.data(12) - m.data(6)*m.data(8)*m.data(13) + m.data(4)*m.data(10)*m.data(13) + m.data(5)*m.data(8)*m.data(14) - m.data(4)*m.data( 9)*m.data(14); - r.data(13) = m.data(1)*m.data(10)*m.data(12) - m.data(2)*m.data( 9)*m.data(12) + m.data(2)*m.data(8)*m.data(13) - m.data(0)*m.data(10)*m.data(13) - m.data(1)*m.data(8)*m.data(14) + m.data(0)*m.data( 9)*m.data(14); - r.data(14) = m.data(2)*m.data( 5)*m.data(12) - m.data(1)*m.data( 6)*m.data(12) - m.data(2)*m.data(4)*m.data(13) + m.data(0)*m.data( 6)*m.data(13) + m.data(1)*m.data(4)*m.data(14) - m.data(0)*m.data( 5)*m.data(14); - r.data(15) = m.data(1)*m.data( 6)*m.data( 8) - m.data(2)*m.data( 5)*m.data( 8) + m.data(2)*m.data(4)*m.data( 9) - m.data(0)*m.data( 6)*m.data( 9) - m.data(1)*m.data(4)*m.data(10) + m.data(0)*m.data( 5)*m.data(10); - r.scale(1.0f / m.determinant()); - return r; - } - - inline Matrix isometryInverse(Matrix::Arg m) - { - Matrix r(identity); - - // transposed 3x3 upper left matrix - for (int i = 0; i < 3; i++) - { - for (int j = 0; j < 3; j++) - { - r(i, j) = m(j, i); - } - } - - // translate by the negative offsets - r.translate(-Vector3(m.data(12), m.data(13), m.data(14))); - - return r; - } - - //Matrix affineInverse(Matrix::Arg m); - - /// Transform the given 3d point with the given matrix. - inline Vector3 transformPoint(Matrix::Arg m, Vector3::Arg p) - { - return Vector3( - p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + m(0,3), - p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + m(1,3), - p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + m(2,3)); - } - - /// Transform the given 3d vector with the given matrix. - inline Vector3 transformVector(Matrix::Arg m, Vector3::Arg p) - { - return Vector3( - p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2), - p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2), - p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2)); - } - - /// Transform the given 4d vector with the given matrix. - inline Vector4 transform(Matrix::Arg m, Vector4::Arg p) - { - return Vector4( - p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + p.w * m(0,3), - p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + p.w * m(1,3), - p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + p.w * m(2,3), - p.x * m(3,0) + p.y * m(3,1) + p.z * m(3,2) + p.w * m(3,3)); - } - - inline Matrix mul(Matrix::Arg a, Matrix::Arg b) - { - // @@ Is this the right order? mul(a, b) = b * a - Matrix m = a; - m.apply(b); - return m; - } - -} // nv namespace - - - - -#if 0 -/** @name Special matrices. */ -//@{ -/** Generate a translation matrix. */ -void TranslationMatrix(const Vec3 & v) { - data[0] = 1; data[1] = 0; data[2] = 0; data[3] = 0; - data[4] = 0; data[5] = 1; data[6] = 0; data[7] = 0; - data[8] = 0; data[9] = 0; data[10] = 1; data[11] = 0; - data[12] = v.x; data[13] = v.y; data[14] = v.z; data[15] = 1; -} - -/** Rotate theta degrees around v. */ -void RotationMatrix( scalar theta, scalar v0, scalar v1, scalar v2 ) { - scalar cost = cos(theta); - scalar sint = sin(theta); - - if( 1 == v0 && 0 == v1 && 0 == v2 ) { - data[0] = 1.0f; data[1] = 0.0f; data[2] = 0.0f; data[3] = 0.0f; - data[4] = 0.0f; data[5] = cost; data[6] = -sint;data[7] = 0.0f; - data[8] = 0.0f; data[9] = sint; data[10] = cost;data[11] = 0.0f; - data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; - } - else if( 0 == v0 && 1 == v1 && 0 == v2 ) { - data[0] = cost; data[1] = 0.0f; data[2] = sint; data[3] = 0.0f; - data[4] = 0.0f; data[5] = 1.0f; data[6] = 0.0f; data[7] = 0.0f; - data[8] = -sint;data[9] = 0.0f;data[10] = cost; data[11] = 0.0f; - data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; - } - else if( 0 == v0 && 0 == v1 && 1 == v2 ) { - data[0] = cost; data[1] = -sint;data[2] = 0.0f; data[3] = 0.0f; - data[4] = sint; data[5] = cost; data[6] = 0.0f; data[7] = 0.0f; - data[8] = 0.0f; data[9] = 0.0f; data[10] = 1.0f;data[11] = 0.0f; - data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; - } - else { - //we need scale a,b,c to unit length. - scalar a2, b2, c2; - a2 = v0 * v0; - b2 = v1 * v1; - c2 = v2 * v2; - - scalar iscale = 1.0f / sqrtf(a2 + b2 + c2); - v0 *= iscale; - v1 *= iscale; - v2 *= iscale; - - scalar abm, acm, bcm; - scalar mcos, asin, bsin, csin; - mcos = 1.0f - cost; - abm = v0 * v1 * mcos; - acm = v0 * v2 * mcos; - bcm = v1 * v2 * mcos; - asin = v0 * sint; - bsin = v1 * sint; - csin = v2 * sint; - data[0] = a2 * mcos + cost; - data[1] = abm - csin; - data[2] = acm + bsin; - data[3] = abm + csin; - data[4] = 0.0f; - data[5] = b2 * mcos + cost; - data[6] = bcm - asin; - data[7] = acm - bsin; - data[8] = 0.0f; - data[9] = bcm + asin; - data[10] = c2 * mcos + cost; - data[11] = 0.0f; - data[12] = 0.0f; - data[13] = 0.0f; - data[14] = 0.0f; - data[15] = 1.0f; - } -} - -/* -void SkewMatrix(scalar angle, const Vec3 & v1, const Vec3 & v2) { -v1.Normalize(); -v2.Normalize(); - -Vec3 v3; -v3.Cross(v1, v2); -v3.Normalize(); - -// Get skew factor. -scalar costheta = Vec3DotProduct(v1, v2); -scalar sintheta = Real.Sqrt(1 - costheta * costheta); -scalar skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta; - -// Build orthonormal matrix. -v1 = FXVector3.Cross(v3, v2); -v1.Normalize(); - -Matrix R = Matrix::Identity; -R[0, 0] = v3.X; // Not sure this is in the correct order... -R[1, 0] = v3.Y; -R[2, 0] = v3.Z; -R[0, 1] = v1.X; -R[1, 1] = v1.Y; -R[2, 1] = v1.Z; -R[0, 2] = v2.X; -R[1, 2] = v2.Y; -R[2, 2] = v2.Z; - -// Build skew matrix. -Matrix S = Matrix::Identity; -S[2, 1] = -skew; - -// Return skew transform. -return R * S * R.Transpose; // Not sure this is in the correct order... -} -*/ - -/** -* Generate rotation matrix for the euler angles. This is the same as computing -* 3 rotation matrices and multiplying them together in our custom order. -* -* @todo Have to recompute this code for our new convention. -**/ -void RotationMatrix( scalar yaw, scalar pitch, scalar roll ) { - scalar sy = sin(yaw+ToRadian(90)); - scalar cy = cos(yaw+ToRadian(90)); - scalar sp = sin(pitch-ToRadian(90)); - scalar cp = cos(pitch-ToRadian(90)); - scalar sr = sin(roll); - scalar cr = cos(roll); - - data[0] = cr*cy + sr*sp*sy; - data[1] = cp*sy; - data[2] = -sr*cy + cr*sp*sy; - data[3] = 0; - - data[4] = -cr*sy + sr*sp*cy; - data[5] = cp*cy; - data[6] = sr*sy + cr*sp*cy; - data[7] = 0; - - data[8] = sr*cp; - data[9] = -sp; - data[10] = cr*cp; - data[11] = 0; - - data[12] = 0; - data[13] = 0; - data[14] = 0; - data[15] = 1; -} - -/** Create a frustum matrix with the far plane at the infinity. */ -void Frustum( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar ) { - scalar one_deltax, one_deltay, one_deltaz, doubleznear; - - doubleznear = 2.0f * zNear; - one_deltax = 1.0f / (xmax - xmin); - one_deltay = 1.0f / (ymax - ymin); - one_deltaz = 1.0f / (zFar - zNear); - - data[0] = (scalar)(doubleznear * one_deltax); - data[1] = 0.0f; - data[2] = 0.0f; - data[3] = 0.0f; - data[4] = 0.0f; - data[5] = (scalar)(doubleznear * one_deltay); - data[6] = 0.f; - data[7] = 0.f; - data[8] = (scalar)((xmax + xmin) * one_deltax); - data[9] = (scalar)((ymax + ymin) * one_deltay); - data[10] = (scalar)(-(zFar + zNear) * one_deltaz); - data[11] = -1.f; - data[12] = 0.f; - data[13] = 0.f; - data[14] = (scalar)(-(zFar * doubleznear) * one_deltaz); - data[15] = 0.f; -} - -/** Create a frustum matrix with the far plane at the infinity. */ -void FrustumInf( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear ) { - scalar one_deltax, one_deltay, doubleznear, nudge; - - doubleznear = 2.0f * zNear; - one_deltax = 1.0f / (xmax - xmin); - one_deltay = 1.0f / (ymax - ymin); - nudge = 1.0; // 0.999; - - data[0] = doubleznear * one_deltax; - data[1] = 0.0f; - data[2] = 0.0f; - data[3] = 0.0f; - - data[4] = 0.0f; - data[5] = doubleznear * one_deltay; - data[6] = 0.f; - data[7] = 0.f; - - data[8] = (xmax + xmin) * one_deltax; - data[9] = (ymax + ymin) * one_deltay; - data[10] = -1.0f * nudge; - data[11] = -1.0f; - - data[12] = 0.f; - data[13] = 0.f; - data[14] = -doubleznear * nudge; - data[15] = 0.f; -} - -/** Create an inverse frustum matrix with the far plane at the infinity. */ -void FrustumInfInv( scalar left, scalar right, scalar bottom, scalar top, scalar zNear ) { - // this matrix is wrong (not tested scalarly) I think it should be transposed. - data[0] = (right - left) / (2 * zNear); - data[1] = 0; - data[2] = 0; - data[3] = (right + left) / (2 * zNear); - data[4] = 0; - data[5] = (top - bottom) / (2 * zNear); - data[6] = 0; - data[7] = (top + bottom) / (2 * zNear); - data[8] = 0; - data[9] = 0; - data[10] = 0; - data[11] = -1; - data[12] = 0; - data[13] = 0; - data[14] = -1 / (2 * zNear); - data[15] = 1 / (2 * zNear); -} - -/** Create an homogeneous projection matrix. */ -void Perspective( scalar fov, scalar aspect, scalar zNear, scalar zFar ) { - scalar xmin, xmax, ymin, ymax; - - xmax = zNear * tan( fov/2 ); - xmin = -xmax; - - ymax = xmax / aspect; - ymin = -ymax; - - Frustum(xmin, xmax, ymin, ymax, zNear, zFar); -} - -/** Create a projection matrix with the far plane at the infinity. */ -void PerspectiveInf( scalar fov, scalar aspect, scalar zNear ) { - scalar x = zNear * tan( fov/2 ); - scalar y = x / aspect; - FrustumInf( -x, x, -y, y, zNear ); -} - -/** Create an inverse projection matrix with far plane at the infinity. */ -void PerspectiveInfInv( scalar fov, scalar aspect, scalar zNear ) { - scalar x = zNear * tan( fov/2 ); - scalar y = x / aspect; - FrustumInfInv( -x, x, -y, y, zNear ); -} - -/** Build bone matrix from quatertion and offset. */ -void BoneMatrix(const Quat & q, const Vec3 & offset) { - scalar x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz; - - // calculate coefficients - x2 = q.x + q.x; - y2 = q.y + q.y; - z2 = q.z + q.z; - - xx = q.x * x2; xy = q.x * y2; xz = q.x * z2; - yy = q.y * y2; yz = q.y * z2; zz = q.z * z2; - wx = q.w * x2; wy = q.w * y2; wz = q.w * z2; - - data[0] = 1.0f - (yy + zz); - data[1] = xy - wz; - data[2] = xz + wy; - data[3] = 0.0f; - - data[4] = xy + wz; - data[5] = 1.0f - (xx + zz); - data[6] = yz - wx; - data[7] = 0.0f; - - data[8] = xz - wy; - data[9] = yz + wx; - data[10] = 1.0f - (xx + yy); - data[11] = 0.0f; - - data[12] = offset.x; - data[13] = offset.y; - data[14] = offset.z; - data[15] = 1.0f; -} - -//@} - - -/** @name Transformations: */ -//@{ - -/** Apply a general scale. */ -void Scale( scalar x, scalar y, scalar z ) { - data[0] *= x; data[4] *= y; data[8] *= z; - data[1] *= x; data[5] *= y; data[9] *= z; - data[2] *= x; data[6] *= y; data[10] *= z; - data[3] *= x; data[7] *= y; data[11] *= z; -} - -/** Apply a rotation of theta degrees around the axis v*/ -void Rotate( scalar theta, const Vec3 & v ) { - Matrix b; - b.RotationMatrix( theta, v[0], v[1], v[2] ); - Multiply4x3( b ); -} - -/** Apply a rotation of theta degrees around the axis v*/ -void Rotate( scalar theta, scalar v0, scalar v1, scalar v2 ) { - Matrix b; - b.RotationMatrix( theta, v0, v1, v2 ); - Multiply4x3( b ); -} - -/** -* Translate the matrix by t. This is the same as multiplying by a -* translation matrix with the given offset. -* this = T * this -*/ -void Translate( const Vec3 &t ) { - data[12] = data[0] * t.x + data[4] * t.y + data[8] * t.z + data[12]; - data[13] = data[1] * t.x + data[5] * t.y + data[9] * t.z + data[13]; - data[14] = data[2] * t.x + data[6] * t.y + data[10] * t.z + data[14]; - data[15] = data[3] * t.x + data[7] * t.y + data[11] * t.z + data[15]; -} - -/** -* Translate the matrix by x, y, z. This is the same as multiplying by a -* translation matrix with the given offsets. -*/ -void Translate( scalar x, scalar y, scalar z ) { - data[12] = data[0] * x + data[4] * y + data[8] * z + data[12]; - data[13] = data[1] * x + data[5] * y + data[9] * z + data[13]; - data[14] = data[2] * x + data[6] * y + data[10] * z + data[14]; - data[15] = data[3] * x + data[7] * y + data[11] * z + data[15]; -} - -/** Compute the transposed matrix. */ -void Transpose() { - piSwap(data[1], data[4]); - piSwap(data[2], data[8]); - piSwap(data[6], data[9]); - piSwap(data[3], data[12]); - piSwap(data[7], data[13]); - piSwap(data[11], data[14]); -} - -/** Compute the inverse of a rigid-body/isometry/orthonormal matrix. */ -void IsometryInverse() { - // transposed 3x3 upper left matrix - piSwap(data[1], data[4]); - piSwap(data[2], data[8]); - piSwap(data[6], data[9]); - - // translate by the negative offsets - Vec3 v(-data[12], -data[13], -data[14]); - data[12] = data[13] = data[14] = 0; - Translate(v); -} - -/** Compute the inverse of the affine portion of this matrix. */ -void AffineInverse() { - data[12] = data[13] = data[14] = 0; - Transpose(); -} -//@} - -/** @name Matrix operations: */ -//@{ - -/** Return the determinant of this matrix. */ -scalar Determinant() const { - return data[0] * data[5] * data[10] * data[15] + - data[1] * data[6] * data[11] * data[12] + - data[2] * data[7] * data[ 8] * data[13] + - data[3] * data[4] * data[ 9] * data[14] - - data[3] * data[6] * data[ 9] * data[12] - - data[2] * data[5] * data[ 8] * data[15] - - data[1] * data[4] * data[11] * data[14] - - data[0] * data[7] * data[10] * data[12]; -} - - -/** Standard matrix product: this *= B. */ -void Multiply4x4( const Matrix & restrict B ) { - Multiply4x4(*this, B); -} - -/** Standard matrix product: this = A * B. this != B*/ -void Multiply4x4( const Matrix & A, const Matrix & restrict B ) { - piDebugCheck(this != &B); - - for(int i = 0; i < 4; i++) { - const scalar ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); - GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); - GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); - GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); - GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3); - } - - /* Unrolled but does not allow this == A - data[0] = A.data[0] * B.data[0] + A.data[4] * B.data[1] + A.data[8] * B.data[2] + A.data[12] * B.data[3]; - data[1] = A.data[1] * B.data[0] + A.data[5] * B.data[1] + A.data[9] * B.data[2] + A.data[13] * B.data[3]; - data[2] = A.data[2] * B.data[0] + A.data[6] * B.data[1] + A.data[10] * B.data[2] + A.data[14] * B.data[3]; - data[3] = A.data[3] * B.data[0] + A.data[7] * B.data[1] + A.data[11] * B.data[2] + A.data[15] * B.data[3]; - data[4] = A.data[0] * B.data[4] + A.data[4] * B.data[5] + A.data[8] * B.data[6] + A.data[12] * B.data[7]; - data[5] = A.data[1] * B.data[4] + A.data[5] * B.data[5] + A.data[9] * B.data[6] + A.data[13] * B.data[7]; - data[6] = A.data[2] * B.data[4] + A.data[6] * B.data[5] + A.data[10] * B.data[6] + A.data[14] * B.data[7]; - data[7] = A.data[3] * B.data[4] + A.data[7] * B.data[5] + A.data[11] * B.data[6] + A.data[15] * B.data[7]; - data[8] = A.data[0] * B.data[8] + A.data[4] * B.data[9] + A.data[8] * B.data[10] + A.data[12] * B.data[11]; - data[9] = A.data[1] * B.data[8] + A.data[5] * B.data[9] + A.data[9] * B.data[10] + A.data[13] * B.data[11]; - data[10]= A.data[2] * B.data[8] + A.data[6] * B.data[9] + A.data[10] * B.data[10] + A.data[14] * B.data[11]; - data[11]= A.data[3] * B.data[8] + A.data[7] * B.data[9] + A.data[11] * B.data[10] + A.data[15] * B.data[11]; - data[12]= A.data[0] * B.data[12] + A.data[4] * B.data[13] + A.data[8] * B.data[14] + A.data[12] * B.data[15]; - data[13]= A.data[1] * B.data[12] + A.data[5] * B.data[13] + A.data[9] * B.data[14] + A.data[13] * B.data[15]; - data[14]= A.data[2] * B.data[12] + A.data[6] * B.data[13] + A.data[10] * B.data[14] + A.data[14] * B.data[15]; - data[15]= A.data[3] * B.data[12] + A.data[7] * B.data[13] + A.data[11] * B.data[14] + A.data[15] * B.data[15]; - */ -} - -/** Standard matrix product: this *= B. */ -void Multiply4x3( const Matrix & restrict B ) { - Multiply4x3(*this, B); -} - -/** Standard product of matrices, where the last row is [0 0 0 1]. */ -void Multiply4x3( const Matrix & A, const Matrix & restrict B ) { - piDebugCheck(this != &B); - - for(int i = 0; i < 3; i++) { - const scalar ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); - GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); - GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); - GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); - GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3); - } - data[3] = 0.0f; data[7] = 0.0f; data[11] = 0.0f; data[15] = 1.0f; - - /* Unrolled but does not allow this == A - data[0] = a.data[0] * b.data[0] + a.data[4] * b.data[1] + a.data[8] * b.data[2] + a.data[12] * b.data[3]; - data[1] = a.data[1] * b.data[0] + a.data[5] * b.data[1] + a.data[9] * b.data[2] + a.data[13] * b.data[3]; - data[2] = a.data[2] * b.data[0] + a.data[6] * b.data[1] + a.data[10] * b.data[2] + a.data[14] * b.data[3]; - data[3] = 0.0f; - data[4] = a.data[0] * b.data[4] + a.data[4] * b.data[5] + a.data[8] * b.data[6] + a.data[12] * b.data[7]; - data[5] = a.data[1] * b.data[4] + a.data[5] * b.data[5] + a.data[9] * b.data[6] + a.data[13] * b.data[7]; - data[6] = a.data[2] * b.data[4] + a.data[6] * b.data[5] + a.data[10] * b.data[6] + a.data[14] * b.data[7]; - data[7] = 0.0f; - data[8] = a.data[0] * b.data[8] + a.data[4] * b.data[9] + a.data[8] * b.data[10] + a.data[12] * b.data[11]; - data[9] = a.data[1] * b.data[8] + a.data[5] * b.data[9] + a.data[9] * b.data[10] + a.data[13] * b.data[11]; - data[10]= a.data[2] * b.data[8] + a.data[6] * b.data[9] + a.data[10] * b.data[10] + a.data[14] * b.data[11]; - data[11]= 0.0f; - data[12]= a.data[0] * b.data[12] + a.data[4] * b.data[13] + a.data[8] * b.data[14] + a.data[12] * b.data[15]; - data[13]= a.data[1] * b.data[12] + a.data[5] * b.data[13] + a.data[9] * b.data[14] + a.data[13] * b.data[15]; - data[14]= a.data[2] * b.data[12] + a.data[6] * b.data[13] + a.data[10] * b.data[14] + a.data[14] * b.data[15]; - data[15]= 1.0f; - */ -} -//@} - - -/** @name Vector operations: */ -//@{ - -/** Transform 3d vector (w=0). */ -void TransformVec3(const Vec3 & restrict orig, Vec3 * restrict dest) const { - piDebugCheck(&orig != dest); - dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8]; - dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9]; - dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10]; -} -/** Transform 3d vector by the transpose (w=0). */ -void TransformVec3T(const Vec3 & restrict orig, Vec3 * restrict dest) const { - piDebugCheck(&orig != dest); - dest->x = orig.x * data[0] + orig.y * data[1] + orig.z * data[2]; - dest->y = orig.x * data[4] + orig.y * data[5] + orig.z * data[6]; - dest->z = orig.x * data[8] + orig.y * data[9] + orig.z * data[10]; -} - -/** Transform a 3d homogeneous vector, where the fourth coordinate is assumed to be 1. */ -void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const { - piDebugCheck(&orig != dest); - dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; - dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; - dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; -} - -/** Transform a point, normalize it, and return w. */ -scalar TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const { - piDebugCheck(&orig != dest); - scalar w; - dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; - dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; - dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; - w = 1 / (orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]); - *dest *= w; - return w; -} - -/** Transform a point and return w. */ -scalar TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const { - piDebugCheck(&orig != dest); - dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; - dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; - dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; - return orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; -} - -/** Transform a normalized 3d point by a 4d matrix and return the resulting 4d vector. */ -void TransformVec4(const Vec3 & orig, Vec4 * dest) const { - dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; - dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; - dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; - dest->w = orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; -} -//@} - -/** @name Matrix analysis. */ -//@{ - -/** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */ -void GetEulerAnglesZYZ(scalar * s, scalar * t, scalar * r) const { - if( GetElem(2,2) < 1.0f ) { - if( GetElem(2,2) > -1.0f ) { - // cs*ct*cr-ss*sr -ss*ct*cr-cs*sr st*cr - // cs*ct*sr+ss*cr -ss*ct*sr+cs*cr st*sr - // -cs*st ss*st ct - *s = atan2(GetElem(1,2), -GetElem(0,2)); - *t = acos(GetElem(2,2)); - *r = atan2(GetElem(2,1), GetElem(2,0)); - } - else { - // -c(s-r) s(s-r) 0 - // s(s-r) c(s-r) 0 - // 0 0 -1 - *s = atan2(GetElem(0, 1), -GetElem(0, 0)); // = s-r - *t = PI; - *r = 0; - } - } - else { - // c(s+r) -s(s+r) 0 - // s(s+r) c(s+r) 0 - // 0 0 1 - *s = atan2(GetElem(0, 1), GetElem(0, 0)); // = s+r - *t = 0; - *r = 0; - } -} - -//@} - -MATHLIB_API friend PiStream & operator<< ( PiStream & s, Matrix & m ); - -/** Print to debug output. */ -void Print() const { - piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[0], data[4], data[8], data[12] ); - piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[1], data[5], data[9], data[13] ); - piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[2], data[6], data[10], data[14] ); - piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[3], data[7], data[11], data[15] ); -} - - -public: - - scalar data[16]; - -}; -#endif - - - - -#endif // NV_MATH_MATRIX_H +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_MATRIX_H +#define NV_MATH_MATRIX_H + +#include +#include + +namespace nv +{ + enum identity_t { identity }; + + class NVMATH_CLASS Matrix3 + { + public: + Matrix3(); + explicit Matrix3(float f); + explicit Matrix3(identity_t); + Matrix3(const Matrix3 & m); + Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2); + + scalar get(uint row, uint col) const; + scalar operator()(uint row, uint col) const; + scalar & operator()(uint row, uint col); + + Vector3 row(uint i) const; + Vector3 column(uint i) const; + + void operator*=(float s); + void operator/=(float s); + void operator+=(const Matrix3 & m); + void operator-=(const Matrix3 & m); + + float determinant() const; + + private: + scalar m_data[9]; + }; + + inline Matrix3::Matrix3() {} + + inline Matrix3::Matrix3(float f) + { + for(int i = 0; i < 9; i++) { + m_data[i] = f; + } + } + + inline Matrix3::Matrix3(identity_t) + { + for(int i = 0; i < 3; i++) { + for(int j = 0; j < 3; j++) { + m_data[3*j+i] = (i == j) ? 1.0f : 0.0f; + } + } + } + + inline Matrix3::Matrix3(const Matrix3 & m) + { + for(int i = 0; i < 9; i++) { + m_data[i] = m.m_data[i]; + } + } + + inline Matrix3::Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2) + { + m_data[0] = v0.x; m_data[1] = v0.y; m_data[2] = v0.z; + m_data[3] = v1.x; m_data[4] = v1.y; m_data[5] = v1.z; + m_data[6] = v2.x; m_data[7] = v2.y; m_data[8] = v2.z; + } + + inline scalar Matrix3::get(uint row, uint col) const + { + nvDebugCheck(row < 3 && col < 3); + return m_data[col * 3 + row]; + } + inline scalar Matrix3::operator()(uint row, uint col) const + { + nvDebugCheck(row < 3 && col < 3); + return m_data[col * 3 + row]; + } + inline scalar & Matrix3::operator()(uint row, uint col) + { + nvDebugCheck(row < 3 && col < 3); + return m_data[col * 3 + row]; + } + + inline Vector3 Matrix3::row(uint i) const + { + nvDebugCheck(i < 3); + return Vector3(get(i, 0), get(i, 1), get(i, 2)); + } + inline Vector3 Matrix3::column(uint i) const + { + nvDebugCheck(i < 3); + return Vector3(get(0, i), get(1, i), get(2, i)); + } + + inline void Matrix3::operator*=(float s) + { + for(int i = 0; i < 9; i++) { + m_data[i] *= s; + } + } + + inline void Matrix3::operator/=(float s) + { + float is = 1.0f /s; + for(int i = 0; i < 9; i++) { + m_data[i] *= is; + } + } + + inline void Matrix3::operator+=(const Matrix3 & m) + { + for(int i = 0; i < 9; i++) { + m_data[i] += m.m_data[i]; + } + } + + inline void Matrix3::operator-=(const Matrix3 & m) + { + for(int i = 0; i < 9; i++) { + m_data[i] -= m.m_data[i]; + } + } + + inline Matrix3 operator+(const Matrix3 & a, const Matrix3 & b) + { + Matrix3 m = a; + m += b; + return m; + } + + inline Matrix3 operator-(const Matrix3 & a, const Matrix3 & b) + { + Matrix3 m = a; + m -= b; + return m; + } + + inline Matrix3 operator*(const Matrix3 & a, float s) + { + Matrix3 m = a; + m *= s; + return m; + } + + inline Matrix3 operator*(float s, const Matrix3 & a) + { + Matrix3 m = a; + m *= s; + return m; + } + + inline Matrix3 operator/(const Matrix3 & a, float s) + { + Matrix3 m = a; + m /= s; + return m; + } + + inline Matrix3 mul(const Matrix3 & a, const Matrix3 & b) + { + Matrix3 m; + + for(int i = 0; i < 3; i++) { + const scalar ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2); + m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0) + ai2 * b(2,0); + m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1) + ai2 * b(2,1); + m(i, 2) = ai0 * b(0,2) + ai1 * b(1,2) + ai2 * b(2,2); + } + + return m; + } + + inline Matrix3 operator*(const Matrix3 & a, const Matrix3 & b) + { + return mul(a, b); + } + + inline float Matrix3::determinant() const + { + return + get(0,0) * get(1,1) * get(2,2) + + get(0,1) * get(1,2) * get(2,0) + + get(0,2) * get(1,0) * get(2,1) - + get(0,2) * get(1,1) * get(2,0) - + get(0,1) * get(1,0) * get(2,2) - + get(0,0) * get(1,2) * get(2,1); + } + + + + /// 4x4 transformation matrix. + /// -# Matrices are stored in memory in column major order. + /// -# Points are to be though of as column vectors. + /// -# Transformation of a point p by a matrix M is: p' = M * p + class NVMATH_CLASS Matrix + { + public: + typedef Matrix const & Arg; + + Matrix(); + explicit Matrix(float f); + explicit Matrix(identity_t); + Matrix(const Matrix & m); + Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3); + //explicit Matrix(const scalar m[]); // m is assumed to contain 16 elements + + scalar data(uint idx) const; + scalar & data(uint idx); + scalar get(uint row, uint col) const; + scalar operator()(uint row, uint col) const; + scalar & operator()(uint row, uint col); + const scalar * ptr() const; + + Vector4 row(uint i) const; + Vector4 column(uint i) const; + + void scale(scalar s); + void scale(Vector3::Arg s); + void translate(Vector3::Arg t); + void rotate(scalar theta, scalar v0, scalar v1, scalar v2); + scalar determinant() const; + + void apply(Matrix::Arg m); + + private: + scalar m_data[16]; + }; + + + inline Matrix::Matrix() + { + } + + inline Matrix::Matrix(float f) + { + for(int i = 0; i < 16; i++) { + m_data[i] = 0.0f; + } + } + + inline Matrix::Matrix(identity_t) + { + for(int i = 0; i < 4; i++) { + for(int j = 0; j < 4; j++) { + m_data[4*j+i] = (i == j) ? 1.0f : 0.0f; + } + } + } + + inline Matrix::Matrix(const Matrix & m) + { + for(int i = 0; i < 16; i++) { + m_data[i] = m.m_data[i]; + } + } + + inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3) + { + m_data[ 0] = v0.x; m_data[ 1] = v0.y; m_data[ 2] = v0.z; m_data[ 3] = v0.w; + m_data[ 4] = v1.x; m_data[ 5] = v1.y; m_data[ 6] = v1.z; m_data[ 7] = v1.w; + m_data[ 8] = v2.x; m_data[ 9] = v2.y; m_data[10] = v2.z; m_data[11] = v2.w; + m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w; + } + + /*inline Matrix::Matrix(const scalar m[]) + { + for(int i = 0; i < 16; i++) { + m_data[i] = m[i]; + } + }*/ + + + // Accessors + inline scalar Matrix::data(uint idx) const + { + nvDebugCheck(idx < 16); + return m_data[idx]; + } + inline scalar & Matrix::data(uint idx) + { + nvDebugCheck(idx < 16); + return m_data[idx]; + } + inline scalar Matrix::get(uint row, uint col) const + { + nvDebugCheck(row < 4 && col < 4); + return m_data[col * 4 + row]; + } + inline scalar Matrix::operator()(uint row, uint col) const + { + nvDebugCheck(row < 4 && col < 4); + return m_data[col * 4 + row]; + } + inline scalar & Matrix::operator()(uint row, uint col) + { + nvDebugCheck(row < 4 && col < 4); + return m_data[col * 4 + row]; + } + + inline const scalar * Matrix::ptr() const + { + return m_data; + } + + inline Vector4 Matrix::row(uint i) const + { + nvDebugCheck(i < 4); + return Vector4(get(i, 0), get(i, 1), get(i, 2), get(i, 3)); + } + + inline Vector4 Matrix::column(uint i) const + { + nvDebugCheck(i < 4); + return Vector4(get(0, i), get(1, i), get(2, i), get(3, i)); + } + + /// Apply scale. + inline void Matrix::scale(scalar s) + { + m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s; + m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s; + m_data[8] *= s; m_data[9] *= s; m_data[10] *= s; m_data[11] *= s; + m_data[12] *= s; m_data[13] *= s; m_data[14] *= s; m_data[15] *= s; + } + + /// Apply scale. + inline void Matrix::scale(Vector3::Arg s) + { + m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x; m_data[3] *= s.x; + m_data[4] *= s.y; m_data[5] *= s.y; m_data[6] *= s.y; m_data[7] *= s.y; + m_data[8] *= s.z; m_data[9] *= s.z; m_data[10] *= s.z; m_data[11] *= s.z; + } + + /// Apply translation. + inline void Matrix::translate(Vector3::Arg t) + { + m_data[12] = m_data[0] * t.x + m_data[4] * t.y + m_data[8] * t.z + m_data[12]; + m_data[13] = m_data[1] * t.x + m_data[5] * t.y + m_data[9] * t.z + m_data[13]; + m_data[14] = m_data[2] * t.x + m_data[6] * t.y + m_data[10] * t.z + m_data[14]; + m_data[15] = m_data[3] * t.x + m_data[7] * t.y + m_data[11] * t.z + m_data[15]; + } + + Matrix rotation(scalar theta, scalar v0, scalar v1, scalar v2); + + /// Apply rotation. + inline void Matrix::rotate(scalar theta, scalar v0, scalar v1, scalar v2) + { + Matrix R(rotation(theta, v0, v1, v2)); + apply(R); + } + + /// Apply transform. + inline void Matrix::apply(Matrix::Arg m) + { + nvDebugCheck(this != &m); + + for(int i = 0; i < 4; i++) { + const scalar ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3); + m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0); + m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1); + m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2); + m_data[12+ i] = ai0 * m(0,3) + ai1 * m(1,3) + ai2 * m(2,3) + ai3 * m(3,3); + } + } + + /// Get scale matrix. + inline Matrix scale(Vector3::Arg s) + { + Matrix m(identity); + m(0,0) = s.x; + m(1,1) = s.y; + m(2,2) = s.z; + return m; + } + + /// Get scale matrix. + inline Matrix scale(scalar s) + { + Matrix m(identity); + m(0,0) = m(1,1) = m(2,2) = s; + return m; + } + + /// Get translation matrix. + inline Matrix translation(Vector3::Arg t) + { + Matrix m(identity); + m(0,3) = t.x; + m(1,3) = t.y; + m(2,3) = t.z; + return m; + } + + /// Get rotation matrix. + inline Matrix rotation(scalar theta, scalar v0, scalar v1, scalar v2) + { + scalar cost = cosf(theta); + scalar sint = sinf(theta); + + Matrix m(identity); + + if( 1 == v0 && 0 == v1 && 0 == v2 ) { + m(1,1) = cost; m(2,1) = -sint; + m(1,2) = sint; m(2,2) = cost; + } + else if( 0 == v0 && 1 == v1 && 0 == v2 ) { + m(0,0) = cost; m(2,0) = sint; + m(1,2) = -sint; m(2,2) = cost; + } + else if( 0 == v0 && 0 == v1 && 1 == v2 ) { + m(0,0) = cost; m(1,0) = -sint; + m(0,1) = sint; m(1,1) = cost; + } + else { + scalar a2, b2, c2; + a2 = v0 * v0; + b2 = v1 * v1; + c2 = v2 * v2; + + scalar iscale = 1.0f / sqrtf(a2 + b2 + c2); + v0 *= iscale; + v1 *= iscale; + v2 *= iscale; + + scalar abm, acm, bcm; + scalar mcos, asin, bsin, csin; + mcos = 1.0f - cost; + abm = v0 * v1 * mcos; + acm = v0 * v2 * mcos; + bcm = v1 * v2 * mcos; + asin = v0 * sint; + bsin = v1 * sint; + csin = v2 * sint; + m(0,0) = a2 * mcos + cost; + m(1,0) = abm - csin; + m(2,0) = acm + bsin; + m(3,0) = abm + csin; + m(1,1) = b2 * mcos + cost; + m(2,1) = bcm - asin; + m(3,1) = acm - bsin; + m(1,2) = bcm + asin; + m(2,2) = c2 * mcos + cost; + } + return m; + } + + //Matrix rotation(scalar yaw, scalar pitch, scalar roll); + //Matrix skew(scalar angle, Vector3::Arg v1, Vector3::Arg v2); + + /// Get frustum matrix. + inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar) + { + Matrix m(0.0f); + + scalar doubleznear = 2.0f * zNear; + scalar one_deltax = 1.0f / (xmax - xmin); + scalar one_deltay = 1.0f / (ymax - ymin); + scalar one_deltaz = 1.0f / (zFar - zNear); + + m(0,0) = doubleznear * one_deltax; + m(1,1) = doubleznear * one_deltay; + m(0,2) = (xmax + xmin) * one_deltax; + m(1,2) = (ymax + ymin) * one_deltay; + m(2,2) = -(zFar + zNear) * one_deltaz; + m(3,2) = -1.0f; + m(2,3) = -(zFar * doubleznear) * one_deltaz; + + return m; + } + + /// Get infinite frustum matrix. + inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear) + { + Matrix m(0.0f); + + scalar doubleznear = 2.0f * zNear; + scalar one_deltax = 1.0f / (xmax - xmin); + scalar one_deltay = 1.0f / (ymax - ymin); + scalar nudge = 1.0; // 0.999; + + m(0,0) = doubleznear * one_deltax; + m(1,1) = doubleznear * one_deltay; + m(0,2) = (xmax + xmin) * one_deltax; + m(1,2) = (ymax + ymin) * one_deltay; + m(2,2) = -1.0f * nudge; + m(3,2) = -1.0f; + m(2,3) = -doubleznear * nudge; + + return m; + } + + /// Get perspective matrix. + inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear, scalar zFar) + { + scalar xmax = zNear * tan(fovy / 2); + scalar xmin = -xmax; + + scalar ymax = xmax / aspect; + scalar ymin = -ymax; + + return frustum(xmin, xmax, ymin, ymax, zNear, zFar); + } + + /// Get infinite perspective matrix. + inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear) + { + scalar x = zNear * tan(fovy / 2); + scalar y = x / aspect; + return frustum( -x, x, -y, y, zNear ); + } + + /// Get matrix determinant. + inline scalar Matrix::determinant() const + { + return + m_data[3] * m_data[6] * m_data[ 9] * m_data[12] - m_data[2] * m_data[7] * m_data[ 9] * m_data[12] - m_data[3] * m_data[5] * m_data[10] * m_data[12] + m_data[1] * m_data[7] * m_data[10] * m_data[12] + + m_data[2] * m_data[5] * m_data[11] * m_data[12] - m_data[1] * m_data[6] * m_data[11] * m_data[12] - m_data[3] * m_data[6] * m_data[ 8] * m_data[13] + m_data[2] * m_data[7] * m_data[ 8] * m_data[13] + + m_data[3] * m_data[4] * m_data[10] * m_data[13] - m_data[0] * m_data[7] * m_data[10] * m_data[13] - m_data[2] * m_data[4] * m_data[11] * m_data[13] + m_data[0] * m_data[6] * m_data[11] * m_data[13] + + m_data[3] * m_data[5] * m_data[ 8] * m_data[14] - m_data[1] * m_data[7] * m_data[ 8] * m_data[14] - m_data[3] * m_data[4] * m_data[ 9] * m_data[14] + m_data[0] * m_data[7] * m_data[ 9] * m_data[14] + + m_data[1] * m_data[4] * m_data[11] * m_data[14] - m_data[0] * m_data[5] * m_data[11] * m_data[14] - m_data[2] * m_data[5] * m_data[ 8] * m_data[15] + m_data[1] * m_data[6] * m_data[ 8] * m_data[15] + + m_data[2] * m_data[4] * m_data[ 9] * m_data[15] - m_data[0] * m_data[6] * m_data[ 9] * m_data[15] - m_data[1] * m_data[4] * m_data[10] * m_data[15] + m_data[0] * m_data[5] * m_data[10] * m_data[15]; + } + + inline Matrix transpose(Matrix::Arg m) + { + Matrix r; + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + r(i, j) = m(j, i); + } + } + return r; + } + + inline Matrix inverse(Matrix::Arg m) + { + Matrix r; + r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15); + r.data( 1) = m.data(3)*m.data(10)*m.data(13) - m.data(2)*m.data(11)*m.data(13) - m.data(3)*m.data(9)*m.data(14) + m.data(1)*m.data(11)*m.data(14) + m.data(2)*m.data(9)*m.data(15) - m.data(1)*m.data(10)*m.data(15); + r.data( 2) = m.data(2)*m.data( 7)*m.data(13) - m.data(3)*m.data( 6)*m.data(13) + m.data(3)*m.data(5)*m.data(14) - m.data(1)*m.data( 7)*m.data(14) - m.data(2)*m.data(5)*m.data(15) + m.data(1)*m.data( 6)*m.data(15); + r.data( 3) = m.data(3)*m.data( 6)*m.data( 9) - m.data(2)*m.data( 7)*m.data( 9) - m.data(3)*m.data(5)*m.data(10) + m.data(1)*m.data( 7)*m.data(10) + m.data(2)*m.data(5)*m.data(11) - m.data(1)*m.data( 6)*m.data(11); + r.data( 4) = m.data(7)*m.data(10)*m.data(12) - m.data(6)*m.data(11)*m.data(12) - m.data(7)*m.data(8)*m.data(14) + m.data(4)*m.data(11)*m.data(14) + m.data(6)*m.data(8)*m.data(15) - m.data(4)*m.data(10)*m.data(15); + r.data( 5) = m.data(2)*m.data(11)*m.data(12) - m.data(3)*m.data(10)*m.data(12) + m.data(3)*m.data(8)*m.data(14) - m.data(0)*m.data(11)*m.data(14) - m.data(2)*m.data(8)*m.data(15) + m.data(0)*m.data(10)*m.data(15); + r.data( 6) = m.data(3)*m.data( 6)*m.data(12) - m.data(2)*m.data( 7)*m.data(12) - m.data(3)*m.data(4)*m.data(14) + m.data(0)*m.data( 7)*m.data(14) + m.data(2)*m.data(4)*m.data(15) - m.data(0)*m.data( 6)*m.data(15); + r.data( 7) = m.data(2)*m.data( 7)*m.data( 8) - m.data(3)*m.data( 6)*m.data( 8) + m.data(3)*m.data(4)*m.data(10) - m.data(0)*m.data( 7)*m.data(10) - m.data(2)*m.data(4)*m.data(11) + m.data(0)*m.data( 6)*m.data(11); + r.data( 8) = m.data(5)*m.data(11)*m.data(12) - m.data(7)*m.data( 9)*m.data(12) + m.data(7)*m.data(8)*m.data(13) - m.data(4)*m.data(11)*m.data(13) - m.data(5)*m.data(8)*m.data(15) + m.data(4)*m.data( 9)*m.data(15); + r.data( 9) = m.data(3)*m.data( 9)*m.data(12) - m.data(1)*m.data(11)*m.data(12) - m.data(3)*m.data(8)*m.data(13) + m.data(0)*m.data(11)*m.data(13) + m.data(1)*m.data(8)*m.data(15) - m.data(0)*m.data( 9)*m.data(15); + r.data(10) = m.data(1)*m.data( 7)*m.data(12) - m.data(3)*m.data( 5)*m.data(12) + m.data(3)*m.data(4)*m.data(13) - m.data(0)*m.data( 7)*m.data(13) - m.data(1)*m.data(4)*m.data(15) + m.data(0)*m.data( 5)*m.data(15); + r.data(11) = m.data(3)*m.data( 5)*m.data( 8) - m.data(1)*m.data( 7)*m.data( 8) - m.data(3)*m.data(4)*m.data( 9) + m.data(0)*m.data( 7)*m.data( 9) + m.data(1)*m.data(4)*m.data(11) - m.data(0)*m.data( 5)*m.data(11); + r.data(12) = m.data(6)*m.data( 9)*m.data(12) - m.data(5)*m.data(10)*m.data(12) - m.data(6)*m.data(8)*m.data(13) + m.data(4)*m.data(10)*m.data(13) + m.data(5)*m.data(8)*m.data(14) - m.data(4)*m.data( 9)*m.data(14); + r.data(13) = m.data(1)*m.data(10)*m.data(12) - m.data(2)*m.data( 9)*m.data(12) + m.data(2)*m.data(8)*m.data(13) - m.data(0)*m.data(10)*m.data(13) - m.data(1)*m.data(8)*m.data(14) + m.data(0)*m.data( 9)*m.data(14); + r.data(14) = m.data(2)*m.data( 5)*m.data(12) - m.data(1)*m.data( 6)*m.data(12) - m.data(2)*m.data(4)*m.data(13) + m.data(0)*m.data( 6)*m.data(13) + m.data(1)*m.data(4)*m.data(14) - m.data(0)*m.data( 5)*m.data(14); + r.data(15) = m.data(1)*m.data( 6)*m.data( 8) - m.data(2)*m.data( 5)*m.data( 8) + m.data(2)*m.data(4)*m.data( 9) - m.data(0)*m.data( 6)*m.data( 9) - m.data(1)*m.data(4)*m.data(10) + m.data(0)*m.data( 5)*m.data(10); + r.scale(1.0f / m.determinant()); + return r; + } + + inline Matrix isometryInverse(Matrix::Arg m) + { + Matrix r(identity); + + // transposed 3x3 upper left matrix + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + { + r(i, j) = m(j, i); + } + } + + // translate by the negative offsets + r.translate(-Vector3(m.data(12), m.data(13), m.data(14))); + + return r; + } + + //Matrix affineInverse(Matrix::Arg m); + + /// Transform the given 3d point with the given matrix. + inline Vector3 transformPoint(Matrix::Arg m, Vector3::Arg p) + { + return Vector3( + p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + m(0,3), + p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + m(1,3), + p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + m(2,3)); + } + + /// Transform the given 3d vector with the given matrix. + inline Vector3 transformVector(Matrix::Arg m, Vector3::Arg p) + { + return Vector3( + p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2), + p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2), + p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2)); + } + + /// Transform the given 4d vector with the given matrix. + inline Vector4 transform(Matrix::Arg m, Vector4::Arg p) + { + return Vector4( + p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + p.w * m(0,3), + p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + p.w * m(1,3), + p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + p.w * m(2,3), + p.x * m(3,0) + p.y * m(3,1) + p.z * m(3,2) + p.w * m(3,3)); + } + + inline Matrix mul(Matrix::Arg a, Matrix::Arg b) + { + // @@ Is this the right order? mul(a, b) = b * a + Matrix m = a; + m.apply(b); + return m; + } + +} // nv namespace + + + + +#if 0 +/** @name Special matrices. */ +//@{ +/** Generate a translation matrix. */ +void TranslationMatrix(const Vec3 & v) { + data[0] = 1; data[1] = 0; data[2] = 0; data[3] = 0; + data[4] = 0; data[5] = 1; data[6] = 0; data[7] = 0; + data[8] = 0; data[9] = 0; data[10] = 1; data[11] = 0; + data[12] = v.x; data[13] = v.y; data[14] = v.z; data[15] = 1; +} + +/** Rotate theta degrees around v. */ +void RotationMatrix( scalar theta, scalar v0, scalar v1, scalar v2 ) { + scalar cost = cos(theta); + scalar sint = sin(theta); + + if( 1 == v0 && 0 == v1 && 0 == v2 ) { + data[0] = 1.0f; data[1] = 0.0f; data[2] = 0.0f; data[3] = 0.0f; + data[4] = 0.0f; data[5] = cost; data[6] = -sint;data[7] = 0.0f; + data[8] = 0.0f; data[9] = sint; data[10] = cost;data[11] = 0.0f; + data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; + } + else if( 0 == v0 && 1 == v1 && 0 == v2 ) { + data[0] = cost; data[1] = 0.0f; data[2] = sint; data[3] = 0.0f; + data[4] = 0.0f; data[5] = 1.0f; data[6] = 0.0f; data[7] = 0.0f; + data[8] = -sint;data[9] = 0.0f;data[10] = cost; data[11] = 0.0f; + data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; + } + else if( 0 == v0 && 0 == v1 && 1 == v2 ) { + data[0] = cost; data[1] = -sint;data[2] = 0.0f; data[3] = 0.0f; + data[4] = sint; data[5] = cost; data[6] = 0.0f; data[7] = 0.0f; + data[8] = 0.0f; data[9] = 0.0f; data[10] = 1.0f;data[11] = 0.0f; + data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; + } + else { + //we need scale a,b,c to unit length. + scalar a2, b2, c2; + a2 = v0 * v0; + b2 = v1 * v1; + c2 = v2 * v2; + + scalar iscale = 1.0f / sqrtf(a2 + b2 + c2); + v0 *= iscale; + v1 *= iscale; + v2 *= iscale; + + scalar abm, acm, bcm; + scalar mcos, asin, bsin, csin; + mcos = 1.0f - cost; + abm = v0 * v1 * mcos; + acm = v0 * v2 * mcos; + bcm = v1 * v2 * mcos; + asin = v0 * sint; + bsin = v1 * sint; + csin = v2 * sint; + data[0] = a2 * mcos + cost; + data[1] = abm - csin; + data[2] = acm + bsin; + data[3] = abm + csin; + data[4] = 0.0f; + data[5] = b2 * mcos + cost; + data[6] = bcm - asin; + data[7] = acm - bsin; + data[8] = 0.0f; + data[9] = bcm + asin; + data[10] = c2 * mcos + cost; + data[11] = 0.0f; + data[12] = 0.0f; + data[13] = 0.0f; + data[14] = 0.0f; + data[15] = 1.0f; + } +} + +/* +void SkewMatrix(scalar angle, const Vec3 & v1, const Vec3 & v2) { +v1.Normalize(); +v2.Normalize(); + +Vec3 v3; +v3.Cross(v1, v2); +v3.Normalize(); + +// Get skew factor. +scalar costheta = Vec3DotProduct(v1, v2); +scalar sintheta = Real.Sqrt(1 - costheta * costheta); +scalar skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta; + +// Build orthonormal matrix. +v1 = FXVector3.Cross(v3, v2); +v1.Normalize(); + +Matrix R = Matrix::Identity; +R[0, 0] = v3.X; // Not sure this is in the correct order... +R[1, 0] = v3.Y; +R[2, 0] = v3.Z; +R[0, 1] = v1.X; +R[1, 1] = v1.Y; +R[2, 1] = v1.Z; +R[0, 2] = v2.X; +R[1, 2] = v2.Y; +R[2, 2] = v2.Z; + +// Build skew matrix. +Matrix S = Matrix::Identity; +S[2, 1] = -skew; + +// Return skew transform. +return R * S * R.Transpose; // Not sure this is in the correct order... +} +*/ + +/** +* Generate rotation matrix for the euler angles. This is the same as computing +* 3 rotation matrices and multiplying them together in our custom order. +* +* @todo Have to recompute this code for our new convention. +**/ +void RotationMatrix( scalar yaw, scalar pitch, scalar roll ) { + scalar sy = sin(yaw+ToRadian(90)); + scalar cy = cos(yaw+ToRadian(90)); + scalar sp = sin(pitch-ToRadian(90)); + scalar cp = cos(pitch-ToRadian(90)); + scalar sr = sin(roll); + scalar cr = cos(roll); + + data[0] = cr*cy + sr*sp*sy; + data[1] = cp*sy; + data[2] = -sr*cy + cr*sp*sy; + data[3] = 0; + + data[4] = -cr*sy + sr*sp*cy; + data[5] = cp*cy; + data[6] = sr*sy + cr*sp*cy; + data[7] = 0; + + data[8] = sr*cp; + data[9] = -sp; + data[10] = cr*cp; + data[11] = 0; + + data[12] = 0; + data[13] = 0; + data[14] = 0; + data[15] = 1; +} + +/** Create a frustum matrix with the far plane at the infinity. */ +void Frustum( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar ) { + scalar one_deltax, one_deltay, one_deltaz, doubleznear; + + doubleznear = 2.0f * zNear; + one_deltax = 1.0f / (xmax - xmin); + one_deltay = 1.0f / (ymax - ymin); + one_deltaz = 1.0f / (zFar - zNear); + + data[0] = (scalar)(doubleznear * one_deltax); + data[1] = 0.0f; + data[2] = 0.0f; + data[3] = 0.0f; + data[4] = 0.0f; + data[5] = (scalar)(doubleznear * one_deltay); + data[6] = 0.f; + data[7] = 0.f; + data[8] = (scalar)((xmax + xmin) * one_deltax); + data[9] = (scalar)((ymax + ymin) * one_deltay); + data[10] = (scalar)(-(zFar + zNear) * one_deltaz); + data[11] = -1.f; + data[12] = 0.f; + data[13] = 0.f; + data[14] = (scalar)(-(zFar * doubleznear) * one_deltaz); + data[15] = 0.f; +} + +/** Create a frustum matrix with the far plane at the infinity. */ +void FrustumInf( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear ) { + scalar one_deltax, one_deltay, doubleznear, nudge; + + doubleznear = 2.0f * zNear; + one_deltax = 1.0f / (xmax - xmin); + one_deltay = 1.0f / (ymax - ymin); + nudge = 1.0; // 0.999; + + data[0] = doubleznear * one_deltax; + data[1] = 0.0f; + data[2] = 0.0f; + data[3] = 0.0f; + + data[4] = 0.0f; + data[5] = doubleznear * one_deltay; + data[6] = 0.f; + data[7] = 0.f; + + data[8] = (xmax + xmin) * one_deltax; + data[9] = (ymax + ymin) * one_deltay; + data[10] = -1.0f * nudge; + data[11] = -1.0f; + + data[12] = 0.f; + data[13] = 0.f; + data[14] = -doubleznear * nudge; + data[15] = 0.f; +} + +/** Create an inverse frustum matrix with the far plane at the infinity. */ +void FrustumInfInv( scalar left, scalar right, scalar bottom, scalar top, scalar zNear ) { + // this matrix is wrong (not tested scalarly) I think it should be transposed. + data[0] = (right - left) / (2 * zNear); + data[1] = 0; + data[2] = 0; + data[3] = (right + left) / (2 * zNear); + data[4] = 0; + data[5] = (top - bottom) / (2 * zNear); + data[6] = 0; + data[7] = (top + bottom) / (2 * zNear); + data[8] = 0; + data[9] = 0; + data[10] = 0; + data[11] = -1; + data[12] = 0; + data[13] = 0; + data[14] = -1 / (2 * zNear); + data[15] = 1 / (2 * zNear); +} + +/** Create an homogeneous projection matrix. */ +void Perspective( scalar fov, scalar aspect, scalar zNear, scalar zFar ) { + scalar xmin, xmax, ymin, ymax; + + xmax = zNear * tan( fov/2 ); + xmin = -xmax; + + ymax = xmax / aspect; + ymin = -ymax; + + Frustum(xmin, xmax, ymin, ymax, zNear, zFar); +} + +/** Create a projection matrix with the far plane at the infinity. */ +void PerspectiveInf( scalar fov, scalar aspect, scalar zNear ) { + scalar x = zNear * tan( fov/2 ); + scalar y = x / aspect; + FrustumInf( -x, x, -y, y, zNear ); +} + +/** Create an inverse projection matrix with far plane at the infinity. */ +void PerspectiveInfInv( scalar fov, scalar aspect, scalar zNear ) { + scalar x = zNear * tan( fov/2 ); + scalar y = x / aspect; + FrustumInfInv( -x, x, -y, y, zNear ); +} + +/** Build bone matrix from quatertion and offset. */ +void BoneMatrix(const Quat & q, const Vec3 & offset) { + scalar x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz; + + // calculate coefficients + x2 = q.x + q.x; + y2 = q.y + q.y; + z2 = q.z + q.z; + + xx = q.x * x2; xy = q.x * y2; xz = q.x * z2; + yy = q.y * y2; yz = q.y * z2; zz = q.z * z2; + wx = q.w * x2; wy = q.w * y2; wz = q.w * z2; + + data[0] = 1.0f - (yy + zz); + data[1] = xy - wz; + data[2] = xz + wy; + data[3] = 0.0f; + + data[4] = xy + wz; + data[5] = 1.0f - (xx + zz); + data[6] = yz - wx; + data[7] = 0.0f; + + data[8] = xz - wy; + data[9] = yz + wx; + data[10] = 1.0f - (xx + yy); + data[11] = 0.0f; + + data[12] = offset.x; + data[13] = offset.y; + data[14] = offset.z; + data[15] = 1.0f; +} + +//@} + + +/** @name Transformations: */ +//@{ + +/** Apply a general scale. */ +void Scale( scalar x, scalar y, scalar z ) { + data[0] *= x; data[4] *= y; data[8] *= z; + data[1] *= x; data[5] *= y; data[9] *= z; + data[2] *= x; data[6] *= y; data[10] *= z; + data[3] *= x; data[7] *= y; data[11] *= z; +} + +/** Apply a rotation of theta degrees around the axis v*/ +void Rotate( scalar theta, const Vec3 & v ) { + Matrix b; + b.RotationMatrix( theta, v[0], v[1], v[2] ); + Multiply4x3( b ); +} + +/** Apply a rotation of theta degrees around the axis v*/ +void Rotate( scalar theta, scalar v0, scalar v1, scalar v2 ) { + Matrix b; + b.RotationMatrix( theta, v0, v1, v2 ); + Multiply4x3( b ); +} + +/** +* Translate the matrix by t. This is the same as multiplying by a +* translation matrix with the given offset. +* this = T * this +*/ +void Translate( const Vec3 &t ) { + data[12] = data[0] * t.x + data[4] * t.y + data[8] * t.z + data[12]; + data[13] = data[1] * t.x + data[5] * t.y + data[9] * t.z + data[13]; + data[14] = data[2] * t.x + data[6] * t.y + data[10] * t.z + data[14]; + data[15] = data[3] * t.x + data[7] * t.y + data[11] * t.z + data[15]; +} + +/** +* Translate the matrix by x, y, z. This is the same as multiplying by a +* translation matrix with the given offsets. +*/ +void Translate( scalar x, scalar y, scalar z ) { + data[12] = data[0] * x + data[4] * y + data[8] * z + data[12]; + data[13] = data[1] * x + data[5] * y + data[9] * z + data[13]; + data[14] = data[2] * x + data[6] * y + data[10] * z + data[14]; + data[15] = data[3] * x + data[7] * y + data[11] * z + data[15]; +} + +/** Compute the transposed matrix. */ +void Transpose() { + piSwap(data[1], data[4]); + piSwap(data[2], data[8]); + piSwap(data[6], data[9]); + piSwap(data[3], data[12]); + piSwap(data[7], data[13]); + piSwap(data[11], data[14]); +} + +/** Compute the inverse of a rigid-body/isometry/orthonormal matrix. */ +void IsometryInverse() { + // transposed 3x3 upper left matrix + piSwap(data[1], data[4]); + piSwap(data[2], data[8]); + piSwap(data[6], data[9]); + + // translate by the negative offsets + Vec3 v(-data[12], -data[13], -data[14]); + data[12] = data[13] = data[14] = 0; + Translate(v); +} + +/** Compute the inverse of the affine portion of this matrix. */ +void AffineInverse() { + data[12] = data[13] = data[14] = 0; + Transpose(); +} +//@} + +/** @name Matrix operations: */ +//@{ + +/** Return the determinant of this matrix. */ +scalar Determinant() const { + return data[0] * data[5] * data[10] * data[15] + + data[1] * data[6] * data[11] * data[12] + + data[2] * data[7] * data[ 8] * data[13] + + data[3] * data[4] * data[ 9] * data[14] - + data[3] * data[6] * data[ 9] * data[12] - + data[2] * data[5] * data[ 8] * data[15] - + data[1] * data[4] * data[11] * data[14] - + data[0] * data[7] * data[10] * data[12]; +} + + +/** Standard matrix product: this *= B. */ +void Multiply4x4( const Matrix & restrict B ) { + Multiply4x4(*this, B); +} + +/** Standard matrix product: this = A * B. this != B*/ +void Multiply4x4( const Matrix & A, const Matrix & restrict B ) { + piDebugCheck(this != &B); + + for(int i = 0; i < 4; i++) { + const scalar ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); + GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); + GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); + GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); + GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3); + } + + /* Unrolled but does not allow this == A + data[0] = A.data[0] * B.data[0] + A.data[4] * B.data[1] + A.data[8] * B.data[2] + A.data[12] * B.data[3]; + data[1] = A.data[1] * B.data[0] + A.data[5] * B.data[1] + A.data[9] * B.data[2] + A.data[13] * B.data[3]; + data[2] = A.data[2] * B.data[0] + A.data[6] * B.data[1] + A.data[10] * B.data[2] + A.data[14] * B.data[3]; + data[3] = A.data[3] * B.data[0] + A.data[7] * B.data[1] + A.data[11] * B.data[2] + A.data[15] * B.data[3]; + data[4] = A.data[0] * B.data[4] + A.data[4] * B.data[5] + A.data[8] * B.data[6] + A.data[12] * B.data[7]; + data[5] = A.data[1] * B.data[4] + A.data[5] * B.data[5] + A.data[9] * B.data[6] + A.data[13] * B.data[7]; + data[6] = A.data[2] * B.data[4] + A.data[6] * B.data[5] + A.data[10] * B.data[6] + A.data[14] * B.data[7]; + data[7] = A.data[3] * B.data[4] + A.data[7] * B.data[5] + A.data[11] * B.data[6] + A.data[15] * B.data[7]; + data[8] = A.data[0] * B.data[8] + A.data[4] * B.data[9] + A.data[8] * B.data[10] + A.data[12] * B.data[11]; + data[9] = A.data[1] * B.data[8] + A.data[5] * B.data[9] + A.data[9] * B.data[10] + A.data[13] * B.data[11]; + data[10]= A.data[2] * B.data[8] + A.data[6] * B.data[9] + A.data[10] * B.data[10] + A.data[14] * B.data[11]; + data[11]= A.data[3] * B.data[8] + A.data[7] * B.data[9] + A.data[11] * B.data[10] + A.data[15] * B.data[11]; + data[12]= A.data[0] * B.data[12] + A.data[4] * B.data[13] + A.data[8] * B.data[14] + A.data[12] * B.data[15]; + data[13]= A.data[1] * B.data[12] + A.data[5] * B.data[13] + A.data[9] * B.data[14] + A.data[13] * B.data[15]; + data[14]= A.data[2] * B.data[12] + A.data[6] * B.data[13] + A.data[10] * B.data[14] + A.data[14] * B.data[15]; + data[15]= A.data[3] * B.data[12] + A.data[7] * B.data[13] + A.data[11] * B.data[14] + A.data[15] * B.data[15]; + */ +} + +/** Standard matrix product: this *= B. */ +void Multiply4x3( const Matrix & restrict B ) { + Multiply4x3(*this, B); +} + +/** Standard product of matrices, where the last row is [0 0 0 1]. */ +void Multiply4x3( const Matrix & A, const Matrix & restrict B ) { + piDebugCheck(this != &B); + + for(int i = 0; i < 3; i++) { + const scalar ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); + GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); + GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); + GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); + GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3); + } + data[3] = 0.0f; data[7] = 0.0f; data[11] = 0.0f; data[15] = 1.0f; + + /* Unrolled but does not allow this == A + data[0] = a.data[0] * b.data[0] + a.data[4] * b.data[1] + a.data[8] * b.data[2] + a.data[12] * b.data[3]; + data[1] = a.data[1] * b.data[0] + a.data[5] * b.data[1] + a.data[9] * b.data[2] + a.data[13] * b.data[3]; + data[2] = a.data[2] * b.data[0] + a.data[6] * b.data[1] + a.data[10] * b.data[2] + a.data[14] * b.data[3]; + data[3] = 0.0f; + data[4] = a.data[0] * b.data[4] + a.data[4] * b.data[5] + a.data[8] * b.data[6] + a.data[12] * b.data[7]; + data[5] = a.data[1] * b.data[4] + a.data[5] * b.data[5] + a.data[9] * b.data[6] + a.data[13] * b.data[7]; + data[6] = a.data[2] * b.data[4] + a.data[6] * b.data[5] + a.data[10] * b.data[6] + a.data[14] * b.data[7]; + data[7] = 0.0f; + data[8] = a.data[0] * b.data[8] + a.data[4] * b.data[9] + a.data[8] * b.data[10] + a.data[12] * b.data[11]; + data[9] = a.data[1] * b.data[8] + a.data[5] * b.data[9] + a.data[9] * b.data[10] + a.data[13] * b.data[11]; + data[10]= a.data[2] * b.data[8] + a.data[6] * b.data[9] + a.data[10] * b.data[10] + a.data[14] * b.data[11]; + data[11]= 0.0f; + data[12]= a.data[0] * b.data[12] + a.data[4] * b.data[13] + a.data[8] * b.data[14] + a.data[12] * b.data[15]; + data[13]= a.data[1] * b.data[12] + a.data[5] * b.data[13] + a.data[9] * b.data[14] + a.data[13] * b.data[15]; + data[14]= a.data[2] * b.data[12] + a.data[6] * b.data[13] + a.data[10] * b.data[14] + a.data[14] * b.data[15]; + data[15]= 1.0f; + */ +} +//@} + + +/** @name Vector operations: */ +//@{ + +/** Transform 3d vector (w=0). */ +void TransformVec3(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10]; +} +/** Transform 3d vector by the transpose (w=0). */ +void TransformVec3T(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + dest->x = orig.x * data[0] + orig.y * data[1] + orig.z * data[2]; + dest->y = orig.x * data[4] + orig.y * data[5] + orig.z * data[6]; + dest->z = orig.x * data[8] + orig.y * data[9] + orig.z * data[10]; +} + +/** Transform a 3d homogeneous vector, where the fourth coordinate is assumed to be 1. */ +void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; +} + +/** Transform a point, normalize it, and return w. */ +scalar TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + scalar w; + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; + w = 1 / (orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]); + *dest *= w; + return w; +} + +/** Transform a point and return w. */ +scalar TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; + return orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; +} + +/** Transform a normalized 3d point by a 4d matrix and return the resulting 4d vector. */ +void TransformVec4(const Vec3 & orig, Vec4 * dest) const { + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; + dest->w = orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; +} +//@} + +/** @name Matrix analysis. */ +//@{ + +/** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */ +void GetEulerAnglesZYZ(scalar * s, scalar * t, scalar * r) const { + if( GetElem(2,2) < 1.0f ) { + if( GetElem(2,2) > -1.0f ) { + // cs*ct*cr-ss*sr -ss*ct*cr-cs*sr st*cr + // cs*ct*sr+ss*cr -ss*ct*sr+cs*cr st*sr + // -cs*st ss*st ct + *s = atan2(GetElem(1,2), -GetElem(0,2)); + *t = acos(GetElem(2,2)); + *r = atan2(GetElem(2,1), GetElem(2,0)); + } + else { + // -c(s-r) s(s-r) 0 + // s(s-r) c(s-r) 0 + // 0 0 -1 + *s = atan2(GetElem(0, 1), -GetElem(0, 0)); // = s-r + *t = PI; + *r = 0; + } + } + else { + // c(s+r) -s(s+r) 0 + // s(s+r) c(s+r) 0 + // 0 0 1 + *s = atan2(GetElem(0, 1), GetElem(0, 0)); // = s+r + *t = 0; + *r = 0; + } +} + +//@} + +MATHLIB_API friend PiStream & operator<< ( PiStream & s, Matrix & m ); + +/** Print to debug output. */ +void Print() const { + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[0], data[4], data[8], data[12] ); + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[1], data[5], data[9], data[13] ); + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[2], data[6], data[10], data[14] ); + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[3], data[7], data[11], data[15] ); +} + + +public: + + scalar data[16]; + +}; +#endif + + + + +#endif // NV_MATH_MATRIX_H diff --git a/src/nvmath/Vector.inl b/src/nvmath/Vector.inl index a676ee4..9b0ec0a 100644 --- a/src/nvmath/Vector.inl +++ b/src/nvmath/Vector.inl @@ -381,14 +381,14 @@ namespace nv return Vector2(max(a.x, b.x), max(a.y, b.y)); } - inline bool isValid(Vector2::Arg v) + inline bool isFinite(Vector2::Arg v) { return isFinite(v.x) && isFinite(v.y); } inline Vector2 validate(Vector2::Arg v, Vector2::Arg fallback = Vector2(0.0f)) { - if (!isValid(v)) return fallback; + if (!isFinite(v)) return fallback; Vector2 vf = v; nv::floatCleanup(vf.component, 2); return vf; @@ -567,14 +567,14 @@ namespace nv return Vector3(ceilf(v.x), ceilf(v.y), ceilf(v.z)); } - inline bool isValid(Vector3::Arg v) + inline bool isFinite(Vector3::Arg v) { return isFinite(v.x) && isFinite(v.y) && isFinite(v.z); } inline Vector3 validate(Vector3::Arg v, Vector3::Arg fallback = Vector3(0.0f)) { - if (!isValid(v)) return fallback; + if (!isFinite(v)) return fallback; Vector3 vf = v; nv::floatCleanup(vf.component, 3); return vf; @@ -699,14 +699,14 @@ namespace nv return Vector4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); } - inline bool isValid(Vector4::Arg v) + inline bool isFinite(Vector4::Arg v) { return isFinite(v.x) && isFinite(v.y) && isFinite(v.z) && isFinite(v.w); } inline Vector4 validate(Vector4::Arg v, Vector4::Arg fallback = Vector4(0.0f)) { - if (!isValid(v)) return fallback; + if (!isFinite(v)) return fallback; Vector4 vf = v; nv::floatCleanup(vf.component, 4); return vf; diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h index 717157c..b9a1bad 100644 --- a/src/nvmath/nvmath.h +++ b/src/nvmath/nvmath.h @@ -5,14 +5,13 @@ #define NV_MATH_H #include "nvcore/nvcore.h" -#include "nvcore/Debug.h" -#include "nvcore/Utils.h" // clamp +#include "nvcore/Debug.h" // nvDebugCheck +#include "nvcore/Utils.h" // clamp #include -#include // INT_MAX #if NV_OS_WIN32 || NV_OS_XBOX -#include +#include // finite, isnan #endif // Function linkage @@ -105,9 +104,12 @@ namespace nv inline float toRadian(float degree) { return degree * (PI / 180.0f); } inline float toDegree(float radian) { return radian * (180.0f / PI); } + // Robust floating point comparisons: + // http://realtimecollisiondetection.net/blog/?p=89 inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON) { - return fabs(f0-f1) <= epsilon; + //return fabs(f0-f1) <= epsilon; + return fabs(f0-f1) <= epsilon * max(1.0f, fabs(f0), fabs(f1)); } inline bool isZero(const float f, const float epsilon = NV_EPSILON) diff --git a/src/nvthread/ThreadPool.h b/src/nvthread/ThreadPool.h index 147a607..84fc41e 100644 --- a/src/nvthread/ThreadPool.h +++ b/src/nvthread/ThreadPool.h @@ -1,8 +1,8 @@ -// This code is in the public domain -- castano@gmail.com - -#pragma once -#ifndef NV_THREAD_THREADPOOL_H -#define NV_THREAD_THREADPOOL_H +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_THREAD_THREADPOOL_H +#define NV_THREAD_THREADPOOL_H #include "nvthread.h" diff --git a/src/nvtt/CubeSurface.cpp b/src/nvtt/CubeSurface.cpp index 83f39d8..99c3c7e 100644 --- a/src/nvtt/CubeSurface.cpp +++ b/src/nvtt/CubeSurface.cpp @@ -183,6 +183,35 @@ Surface CubeSurface::unfold(CubeLayout layout) const } +float CubeSurface::average(int channel) const +{ + const uint edgeLength = m->edgeLength; + + // These tables along with the surface so that we only compute them once. + if (m->solidAngleTable == NULL) { + m->solidAngleTable = new SolidAngleTable(edgeLength); + } + + float total = 0.0f; + float sum = 0.0f; + + for (int f = 0; f < 6; f++) { + float * c = m->face[f].m->image->channel(channel); + + for (uint y = 0; y < edgeLength; y++) { + for (uint x = 0; x < edgeLength; x++) { + float solidAngle = m->solidAngleTable->lookup(x, y); + + total += solidAngle; + sum += c[y * edgeLength + x] * solidAngle; + } + } + } + + return sum / total; +} + + CubeSurface CubeSurface::irradianceFilter(int size) const { // @@ TODO @@ -237,7 +266,7 @@ SolidAngleTable::SolidAngleTable(uint edgeLength) : size(edgeLength/2) { for (uint y = 0; y < size; y++) { for (uint x = 0; x < size; x++) { - data[y * size + x] = solidAngleTerm(128+x, 128+y, inverseEdgeLength); + data[y * size + x] = solidAngleTerm(size+x, size+y, inverseEdgeLength); } } } @@ -631,7 +660,7 @@ CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower) const CubeSurface filteredCube; filteredCube.m->allocate(size); - // Store these tables along with the surface. Compute them only once! + // These tables along with the surface so that we only compute them once. if (m->solidAngleTable == NULL) { m->solidAngleTable = new SolidAngleTable(edgeLength); } diff --git a/src/nvtt/CubeSurface.h b/src/nvtt/CubeSurface.h index 31cc46d..19a42e0 100644 --- a/src/nvtt/CubeSurface.h +++ b/src/nvtt/CubeSurface.h @@ -74,7 +74,7 @@ namespace nvtt edgeLength = p.edgeLength; for (uint i = 0; i < 6; i++) { - face[i] = p.face[6]; + face[i] = p.face[i]; } solidAngleTable = NULL; // @@ Transfer tables. Needs refcounting? vectorTable = NULL; diff --git a/src/nvtt/OutputOptions.cpp b/src/nvtt/OutputOptions.cpp index 59de6b8..f5e6e71 100644 --- a/src/nvtt/OutputOptions.cpp +++ b/src/nvtt/OutputOptions.cpp @@ -44,6 +44,7 @@ OutputOptions::~OutputOptions() void OutputOptions::reset() { m.fileName.reset(); + m.fileHandle = NULL; m.outputHandler = NULL; m.errorHandler = NULL; @@ -52,37 +53,67 @@ void OutputOptions::reset() m.container = Container_DDS; m.version = 0; m.srgb = false; + m.deleteOutputHandler = false; } /// Set output file name. void OutputOptions::setFileName(const char * fileName) { - if (!m.fileName.isNull()) + if (m.deleteOutputHandler) { - // To close the file and avoid leak. delete m.outputHandler; } m.fileName = fileName; + m.fileHandle = NULL; m.outputHandler = NULL; + m.deleteOutputHandler = false; DefaultOutputHandler * oh = new DefaultOutputHandler(fileName); - if (!oh->stream.isError()) - { + if (oh->stream.isError()) { + delete oh; + } + else { + m.deleteOutputHandler = true; + m.outputHandler = oh; + } +} + +/// Set output file handle. +void OutputOptions::setFileHandle(void * fp) +{ + if (m.deleteOutputHandler) { + delete m.outputHandler; + } + + m.fileName.reset(); + m.fileHandle = (FILE *)fp; + m.outputHandler = NULL; + m.deleteOutputHandler = false; + + DefaultOutputHandler * oh = new DefaultOutputHandler(m.fileHandle); + if (oh->stream.isError()) { + delete oh; + } + else { + m.deleteOutputHandler = true; m.outputHandler = oh; } } + /// Set output handler. void OutputOptions::setOutputHandler(OutputHandler * outputHandler) { - if (!m.fileName.isNull()) - { + if (m.deleteOutputHandler) { delete m.outputHandler; - m.fileName.reset(); } + + m.fileName.reset(); + m.fileHandle = NULL; m.outputHandler = outputHandler; + m.deleteOutputHandler = false; } /// Set error handler. @@ -117,7 +148,7 @@ void OutputOptions::setSrgbFlag(bool b) bool OutputOptions::Private::hasValidOutputHandler() const { - if (!fileName.isNull()) + if (!fileName.isNull() || fileHandle != NULL) { return outputHandler != NULL; } diff --git a/src/nvtt/OutputOptions.h b/src/nvtt/OutputOptions.h index 2a272a0..90376b9 100644 --- a/src/nvtt/OutputOptions.h +++ b/src/nvtt/OutputOptions.h @@ -25,16 +25,19 @@ #ifndef NV_TT_OUTPUTOPTIONS_H #define NV_TT_OUTPUTOPTIONS_H -#include // Path -#include #include "nvtt.h" +#include "nvcore/StrLib.h" // Path +#include "nvcore/StdStream.h" + + namespace nvtt { struct DefaultOutputHandler : public nvtt::OutputHandler { DefaultOutputHandler(const char * fileName) : stream(fileName) {} + DefaultOutputHandler(FILE * fp) : stream(fp, false) {} virtual ~DefaultOutputHandler() {} @@ -64,6 +67,7 @@ namespace nvtt struct OutputOptions::Private { nv::Path fileName; + FILE * fileHandle; OutputHandler * outputHandler; ErrorHandler * errorHandler; @@ -72,6 +76,7 @@ namespace nvtt Container container; int version; bool srgb; + bool deleteOutputHandler; bool hasValidOutputHandler() const; diff --git a/src/nvtt/Surface.cpp b/src/nvtt/Surface.cpp index e5fb086..59f8148 100644 --- a/src/nvtt/Surface.cpp +++ b/src/nvtt/Surface.cpp @@ -704,13 +704,14 @@ void Surface::resize(int w, int h, int d, ResizeFilter filter) void Surface::resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params) { - FloatImage * img = m->image; - if (img == NULL || (w == img->width() && h == img->height() && d == img->depth())) { + if (isNull() || (w == width() && h == height() && d == depth())) { return; } detach(); + FloatImage * img = m->image; + FloatImage::WrapMode wrapMode = (FloatImage::WrapMode)m->wrapMode; if (m->alphaMode == AlphaMode_Transparency) @@ -781,7 +782,7 @@ void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter) void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter, float filterWidth, const float * params) { - if (m->image == NULL) return; + if (isNull()) return; int w = m->image->width(); int h = m->image->height(); @@ -803,13 +804,14 @@ bool Surface::buildNextMipmap(MipmapFilter filter) bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params) { - FloatImage * img = m->image; - if (img == NULL || (img->width() == 1 && img->height() == 1 && img->depth() == 1)) { + if (isNull() || (width() == 1 && height() == 1 && depth() == 1)) { return false; } detach(); + FloatImage * img = m->image; + FloatImage::WrapMode wrapMode = (FloatImage::WrapMode)m->wrapMode; if (m->alphaMode == AlphaMode_Transparency) @@ -868,13 +870,14 @@ void Surface::canvasSize(int w, int h, int d) { nvDebugCheck(w > 0 && h > 0 && d > 0); - FloatImage * img = m->image; - if (img == NULL || (w == img->width() && h == img->height() && d == img->depth())) { + if (isNull() || (w == width() && h == height() && d == depth())) { return; } detach(); + FloatImage * img = m->image; + FloatImage * new_img = new FloatImage; new_img->allocate(4, w, h, d); new_img->clear(); @@ -903,7 +906,7 @@ void Surface::canvasSize(int w, int h, int d) // Color transforms. void Surface::toLinear(float gamma) { - if (m->image == NULL) return; + if (isNull()) return; if (equal(gamma, 1.0f)) return; detach(); @@ -913,7 +916,7 @@ void Surface::toLinear(float gamma) void Surface::toGamma(float gamma) { - if (m->image == NULL) return; + if (isNull()) return; if (equal(gamma, 1.0f)) return; detach(); @@ -923,7 +926,8 @@ void Surface::toGamma(float gamma) static float toSrgb(float f) { - if (f <= 0.0) f = 0.0f; + if (isNan(f)) f = 0.0f; + else if (f <= 0.0f) f = 0.0f; else if (f <= 0.0031308f) f = 12.92f * f; else if (f <= 1.0f) f = (powf(f, 0.41666f) * 1.055f) - 0.055f; else f = 1.0f; @@ -932,21 +936,43 @@ static float toSrgb(float f) { void Surface::toSrgb() { - FloatImage * img = m->image; - if (img == NULL) return; + if (isNull()) return; detach(); + FloatImage * img = m->image; + const uint count = img->pixelCount(); - for (uint j = 0; j < count; j++) - { - float & r = img->pixel(0, j); - float & g = img->pixel(1, j); - float & b = img->pixel(2, j); + for (uint c = 0; c < 3; c++) { + float * channel = img->channel(c); + for (uint i = 0; i < count; i++) { + channel[i] = ::toSrgb(channel[i]); + } + } +} + +static float fromSrgb(float f) { + if (f < 0.0f) f = 0.0f; + else if (f < 0.04045f) f = f / 12.92f; + else if (f <= 1.0f) f = powf((f + 0.055f) / 1.055f, 2.4f); + else f = 1.0f; + return f; +} + +void Surface::toLinearFromSrgb() +{ + if (isNull()) return; + + detach(); - r = ::toSrgb(r); - g = ::toSrgb(g); - b = ::toSrgb(b); + FloatImage * img = m->image; + + const uint count = img->pixelCount(); + for (uint c = 0; c < 3; c++) { + float * channel = img->channel(c); + for (uint i = 0; i < count; i++) { + channel[i] = ::fromSrgb(channel[i]); + } } } @@ -962,28 +988,25 @@ static float toXenonSrgb(float f) { void Surface::toXenonSrgb() { - FloatImage * img = m->image; - if (img == NULL) return; + if (isNull()) return; detach(); - const uint count = img->pixelCount(); - for (uint j = 0; j < count; j++) - { - float & r = img->pixel(0, j); - float & g = img->pixel(1, j); - float & b = img->pixel(2, j); + FloatImage * img = m->image; - r = ::toXenonSrgb(r); - g = ::toXenonSrgb(g); - b = ::toXenonSrgb(b); + const uint count = img->pixelCount(); + for (uint c = 0; c < 3; c++) { + float * channel = img->channel(c); + for (uint i = 0; i < count; i++) { + channel[i] = ::toXenonSrgb(channel[i]); + } } } void Surface::transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1000,7 +1023,7 @@ void Surface::transform(const float w0[4], const float w1[4], const float w2[4], void Surface::swizzle(int r, int g, int b, int a) { - if (m->image == NULL) return; + if (isNull()) return; if (r == 0 && g == 1 && b == 2 && a == 3) return; detach(); @@ -1011,7 +1034,7 @@ void Surface::swizzle(int r, int g, int b, int a) // color * scale + bias void Surface::scaleBias(int channel, float scale, float bias) { - if (m->image == NULL) return; + if (isNull()) return; if (equal(scale, 1.0f) && equal(bias, 0.0f)) return; detach(); @@ -1021,7 +1044,7 @@ void Surface::scaleBias(int channel, float scale, float bias) void Surface::clamp(int channel, float low, float high) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1045,7 +1068,7 @@ void Surface::expandNormal() void Surface::blend(float red, float green, float blue, float alpha, float t) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1067,7 +1090,7 @@ void Surface::blend(float red, float green, float blue, float alpha, float t) void Surface::premultiplyAlpha() { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1089,7 +1112,7 @@ void Surface::premultiplyAlpha() void Surface::toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1116,7 +1139,7 @@ void Surface::toGreyScale(float redScale, float greenScale, float blueScale, flo // Draw colored border. void Surface::setBorder(float r, float g, float b, float a) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1158,7 +1181,7 @@ void Surface::setBorder(float r, float g, float b, float a) // Fill image with the given color. void Surface::fill(float red, float green, float blue, float alpha) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1181,7 +1204,7 @@ void Surface::fill(float red, float green, float blue, float alpha) void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1220,7 +1243,7 @@ void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/) // Once you have M quantized, you would compute the corresponding RGB and quantize that. void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1288,7 +1311,7 @@ void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/) void Surface::fromRGBM(float range/*= 1*/) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1557,7 +1580,7 @@ void Surface::fromRGBE(int mantissaBits, int exponentBits) // Y is in the [0, 1] range, while CoCg are in the [-1, 1] range. void Surface::toYCoCg() { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1594,7 +1617,7 @@ void Surface::toYCoCg() // and minimize bilinear interpolation artifacts. void Surface::blockScaleCoCg(int bits/*= 5*/, float threshold/*= 0.0*/) { - if (m->image == NULL || m->image->depth() != 1) return; + if (isNull() || depth() != 1) return; detach(); @@ -1652,7 +1675,7 @@ void Surface::blockScaleCoCg(int bits/*= 5*/, float threshold/*= 0.0*/) void Surface::fromYCoCg() { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1685,7 +1708,7 @@ void Surface::fromYCoCg() void Surface::toLUVW(float range/*= 1.0f*/) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1720,7 +1743,7 @@ void Surface::fromLUVW(float range/*= 1.0f*/) void Surface::abs(int channel) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1735,7 +1758,7 @@ void Surface::abs(int channel) void Surface::convolve(int channel, int kernelSize, float * kernelData) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1746,7 +1769,7 @@ void Surface::convolve(int channel, int kernelSize, float * kernelData) /* void Surface::blockLuminanceScale(float scale) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1821,7 +1844,7 @@ void Surface::blockLuminanceScale(float scale) /* void Surface::toJPEGLS() { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1844,7 +1867,7 @@ void Surface::toJPEGLS() void Surface::fromJPEGLS() { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1870,7 +1893,7 @@ void Surface::fromJPEGLS() // If dither is true, this uses Floyd-Steinberg dithering method. void Surface::binarize(int channel, float threshold, bool dither) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -1933,7 +1956,7 @@ void Surface::binarize(int channel, float threshold, bool dither) // When dither is true, this uses Floyd-Steinberg dithering. void Surface::quantize(int channel, int bits, bool exactEndPoints, bool dither) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -2004,7 +2027,7 @@ void Surface::quantize(int channel, int bits, bool exactEndPoints, bool dither) // Set normal map options. void Surface::toNormalMap(float sm, float medium, float big, float large) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -2023,7 +2046,7 @@ void Surface::toNormalMap(float sm, float medium, float big, float large) void Surface::normalizeNormalMap() { - if (m->image == NULL) return; + if (isNull()) return; if (!m->isNormalMap) return; detach(); @@ -2033,7 +2056,7 @@ void Surface::normalizeNormalMap() void Surface::transformNormals(NormalTransform xform) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -2106,7 +2129,7 @@ void Surface::transformNormals(NormalTransform xform) void Surface::reconstructNormals(NormalTransform xform) { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -2155,7 +2178,7 @@ void Surface::reconstructNormals(NormalTransform xform) void Surface::toCleanNormalMap() { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -2174,14 +2197,14 @@ void Surface::toCleanNormalMap() // [-1,1] -> [ 0,1] void Surface::packNormals() { - if (m->image == NULL) return; + if (isNull()) return; detach(); m->image->packNormals(0); } // [ 0,1] -> [-1,1] void Surface::expandNormals() { - if (m->image == NULL) return; + if (isNull()) return; detach(); m->image->expandNormals(0); } @@ -2189,7 +2212,7 @@ void Surface::expandNormals() { void Surface::flipX() { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -2198,7 +2221,7 @@ void Surface::flipX() void Surface::flipY() { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -2207,7 +2230,7 @@ void Surface::flipY() void Surface::flipZ() { - if (m->image == NULL) return; + if (isNull()) return; detach(); @@ -2233,6 +2256,8 @@ bool Surface::copyChannel(const Surface & srcImage, int srcChannel, int dstChann detach(); + dst = m->image; + memcpy(dst->channel(dstChannel), src->channel(srcChannel), dst->pixelCount()*sizeof(float)); return true; @@ -2252,6 +2277,8 @@ bool Surface::addChannel(const Surface & srcImage, int srcChannel, int dstChanne detach(); + dst = m->image; + const uint w = src->width(); const uint h = src->height(); diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index c71a41e..4f2b068 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -350,6 +350,7 @@ namespace nvtt NVTT_API void reset(); NVTT_API void setFileName(const char * fileName); + NVTT_API void setFileHandle(void * fp); NVTT_API void setOutputHandler(OutputHandler * outputHandler); NVTT_API void setErrorHandler(ErrorHandler * errorHandler); @@ -464,6 +465,7 @@ namespace nvtt NVTT_API void toLinear(float gamma); NVTT_API void toGamma(float gamma); NVTT_API void toSrgb(); + NVTT_API void toLinearFromSrgb(); NVTT_API void toXenonSrgb(); NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]); NVTT_API void swizzle(int r, int g, int b, int a); @@ -564,6 +566,8 @@ namespace nvtt // @@ Add edge fixup methods. + NVTT_API float average(int channel) const; + // Filtering. NVTT_API CubeSurface irradianceFilter(int size) const; NVTT_API CubeSurface cosinePowerFilter(int size, float cosinePower) const;