diff --git a/src/nvtt/ClusterFit.cpp b/src/nvtt/ClusterFit.cpp index a83c68c..54652e9 100644 --- a/src/nvtt/ClusterFit.cpp +++ b/src/nvtt/ClusterFit.cpp @@ -1,82 +1,13 @@ -/* ----------------------------------------------------------------------------- - - Copyright (c) 2006 Simon Brown si@sjbrown.co.uk - Copyright (c) 2006 Ignacio Castano icastano@nvidia.com - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -------------------------------------------------------------------------- */ +// MIT license see full LICENSE text at end of file #include "ClusterFit.h" #include "nvmath/Fitting.h" #include "nvmath/Vector.inl" -#include "nvmath/ftoi.h" -#include "nvimage/ColorBlock.h" #include // FLT_MAX using namespace nv; -ClusterFit::ClusterFit() -{ -} - -/* -// find minimum and maximum colors based on bounding box in color space -inline static void fit_colors_bbox(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1) -{ - *c0 = Vector3(0); - *c1 = Vector3(1); - - for (int i = 0; i < count; i++) { - *c0 = max(*c0, colors[i]); - *c1 = min(*c1, colors[i]); - } -} - -inline static void select_diagonal(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1) -{ - Vector3 center = (*c0 + *c1) * 0.5f; - - Vector2 covariance = Vector2(0); - for (int i = 0; i < count; i++) { - Vector3 t = colors[i] - center; - covariance += t.xy() * t.z; - } - - float x0 = c0->x; - float y0 = c0->y; - float x1 = c1->x; - float y1 = c1->y; - - if (covariance.x < 0) { - swap(x0, x1); - } - if (covariance.y < 0) { - swap(y0, y1); - } - - c0->set(x0, y0, c0->z); - c1->set(x1, y1, c1->z); -} -*/ void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count) { @@ -91,17 +22,10 @@ void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int m_count = count; + // I've tried using a lower quality approximation of the principal direction, but the best fit line seems to produce best results. Vector3 principal = Fit::computePrincipalComponent_PowerMethod(count, colors, weights, metric); //Vector3 principal = Fit::computePrincipalComponent_EigenSolver(count, colors, weights, metric); - /*// This approximation produces slightly lower quality: - Vector3 c0, c1; - fit_colors_bbox(colors, count, &c0, &c1); - select_diagonal(colors, count, &c0, &c1); - if (c0 != c1) { - principal = normalize(c1 - c0); - }*/ - // build the list of values int order[16]; float dps[16]; @@ -194,11 +118,11 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end ) SimdVector x0 = zero; // check all possible clusters for this total order - for( int c0 = 0; c0 <= count; c0++) + for (int c0 = 0; c0 <= count; c0++) { SimdVector x1 = zero; - for( int c1 = 0; c1 <= count-c0; c1++) + for (int c1 = 0; c1 <= count-c0; c1++) { const SimdVector x2 = m_xsum - x1 - x0; @@ -238,7 +162,7 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end ) SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ(); // keep the solution if it wins - if( compareAnyLessThan( error, besterror ) ) + if (compareAnyLessThan(error, besterror)) { besterror = error; beststart = a; @@ -252,9 +176,8 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end ) } // save the block if necessary - if( compareAnyLessThan( besterror, m_besterror ) ) + if (compareAnyLessThan(besterror, m_besterror)) { - *start = beststart.toVector3(); *end = bestend.toVector3(); @@ -288,15 +211,15 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end ) SimdVector x0 = zero; // check all possible clusters for this total order - for( int c0 = 0; c0 <= count; c0++) + for (int c0 = 0; c0 <= count; c0++) { SimdVector x1 = zero; - for( int c1 = 0; c1 <= count-c0; c1++) + for (int c1 = 0; c1 <= count-c0; c1++) { SimdVector x2 = zero; - for( int c2 = 0; c2 <= count-c0-c1; c2++) + for (int c2 = 0; c2 <= count-c0-c1; c2++) { const SimdVector x3 = m_xsum - x2 - x1 - x0; @@ -469,7 +392,7 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end) } // save the block if necessary - if( besterror < m_besterror ) + if (besterror < m_besterror) { *start = beststart; @@ -582,3 +505,25 @@ bool ClusterFit::compress4(Vector3 * start, Vector3 * end) } #endif // NVTT_USE_SIMD + +// Copyright (c) 2006-2020 Ignacio Castano icastano@nvidia.com +// Copyright (c) 2006 Simon Brown si@sjbrown.co.uk +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/nvtt/ClusterFit.h b/src/nvtt/ClusterFit.h index 7f85b6c..3597143 100644 --- a/src/nvtt/ClusterFit.h +++ b/src/nvtt/ClusterFit.h @@ -1,31 +1,5 @@ -/* ----------------------------------------------------------------------------- - - Copyright (c) 2006 Simon Brown si@sjbrown.co.uk - Copyright (c) 2006 Ignacio Castano icastano@nvidia.com - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -------------------------------------------------------------------------- */ - -#ifndef NVTT_CLUSTERFIT_H -#define NVTT_CLUSTERFIT_H +// MIT license see full LICENSE text at end of file +#pragma once #include "nvmath/SimdVector.h" #include "nvmath/Vector.h" @@ -42,9 +16,8 @@ namespace nv { class ClusterFit { public: - ClusterFit(); + ClusterFit() {} - //void setColorSet(const ColorSet * set); void setColorSet(const Vector3 * colors, const float * weights, int count); void setColorWeights(const Vector4 & w); @@ -80,4 +53,24 @@ namespace nv { } // nv namespace -#endif // NVTT_CLUSTERFIT_H +// Copyright (c) 2006-2020 Ignacio Castano icastano@nvidia.com +// Copyright (c) 2006 Simon Brown si@sjbrown.co.uk +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/nvtt/CubeSurface.cpp b/src/nvtt/CubeSurface.cpp index 004a02b..91c13b6 100644 --- a/src/nvtt/CubeSurface.cpp +++ b/src/nvtt/CubeSurface.cpp @@ -303,7 +303,10 @@ const Surface & CubeSurface::face(int f) const bool CubeSurface::load(const char * fileName, int mipmap) { if (strEqual(Path::extension(fileName), ".dds")) { - nv::DirectDrawSurface dds(fileName); + nv::DirectDrawSurface dds; + if (!dds.load(fileName)) { + return false; + } if (!dds.isValid()/* || !dds.isSupported()*/) { return false; diff --git a/src/nvtt/Surface.cpp b/src/nvtt/Surface.cpp index bf5955c..a3bc794 100644 --- a/src/nvtt/Surface.cpp +++ b/src/nvtt/Surface.cpp @@ -44,6 +44,7 @@ #include "nvthread/ParallelFor.h" #include "nvcore/Array.inl" +#include "nvcore/StrLib.h" #include #include // memset, memcpy @@ -569,77 +570,80 @@ bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/) AutoPtr img(ImageIO::loadFloat(fileName)); if (img == NULL) { // Try loading as DDS. - if (nv::strEqual(nv::Path::extension(fileName), ".dds")) { - nv::DirectDrawSurface dds; - if (dds.load(fileName)) { - if (dds.header.isBlockFormat()) { - int w = dds.surfaceWidth(0); - int h = dds.surfaceHeight(0); - uint size = dds.surfaceSize(0); - - void * data = malloc(size); - dds.readSurface(0, 0, data, size); - - // @@ Handle all formats! @@ Get nvtt format from dds.surfaceFormat() ? - - if (dds.header.hasDX10Header()) { - if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM_SRGB) { - this->setImage2D(nvtt::Format_BC1, nvtt::Decoder_D3D10, w, h, data); - } - else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM_SRGB) { - this->setImage2D(nvtt::Format_BC2, nvtt::Decoder_D3D10, w, h, data); - } - else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM_SRGB) { - this->setImage2D(nvtt::Format_BC3, nvtt::Decoder_D3D10, w, h, data); - } - else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16) { - this->setImage2D(nvtt::Format_BC6, nvtt::Decoder_D3D10, w, h, data); - } - else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM_SRGB) { - this->setImage2D(nvtt::Format_BC7, nvtt::Decoder_D3D10, w, h, data); - } - else { - // @@ - nvCheck(false && "Format not handled with DDS10 header."); - } - } - else { - uint fourcc = dds.header.pf.fourcc; - if (fourcc == FOURCC_DXT1) { - this->setImage2D(nvtt::Format_BC1, nvtt::Decoder_D3D10, w, h, data); - } - else if (fourcc == FOURCC_DXT3) { - this->setImage2D(nvtt::Format_BC2, nvtt::Decoder_D3D10, w, h, data); - } - else if (fourcc == FOURCC_DXT5) { - this->setImage2D(nvtt::Format_BC3, nvtt::Decoder_D3D10, w, h, data); - } - else { - // @@ - nvCheck(false && "Format not handled with DDS9 header."); - } - } + if (!nv::strEqual(nv::Path::extension(fileName), ".dds")) { + return false; + } + + nv::DirectDrawSurface dds; + if (!dds.load(fileName)) { + return false; + } + + if (dds.header.isBlockFormat()) { + int w = dds.surfaceWidth(0); + int h = dds.surfaceHeight(0); + uint size = dds.surfaceSize(0); + + void * data = malloc(size); + dds.readSurface(0, 0, data, size); + + // @@ Handle all formats! @@ Get nvtt format from dds.surfaceFormat() ? - free(data); + if (dds.header.hasDX10Header()) { + if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM_SRGB) { + this->setImage2D(nvtt::Format_BC1, nvtt::Decoder_D3D10, w, h, data); + } + else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM_SRGB) { + this->setImage2D(nvtt::Format_BC2, nvtt::Decoder_D3D10, w, h, data); + } + else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM_SRGB) { + this->setImage2D(nvtt::Format_BC3, nvtt::Decoder_D3D10, w, h, data); + } + else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16) { + this->setImage2D(nvtt::Format_BC6, nvtt::Decoder_D3D10, w, h, data); + } + else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM_SRGB) { + this->setImage2D(nvtt::Format_BC7, nvtt::Decoder_D3D10, w, h, data); } else { - Image img; - dds.mipmap(&img, /*face=*/0, /*mipmap=*/0); + // @@ + nvCheck(false && "Format not handled with DDS10 header."); + } + } + else { + uint fourcc = dds.header.pf.fourcc; + if (fourcc == FOURCC_DXT1) { + this->setImage2D(nvtt::Format_BC1, nvtt::Decoder_D3D10, w, h, data); + } + else if (fourcc == FOURCC_DXT3) { + this->setImage2D(nvtt::Format_BC2, nvtt::Decoder_D3D10, w, h, data); + } + else if (fourcc == FOURCC_DXT5) { + this->setImage2D(nvtt::Format_BC3, nvtt::Decoder_D3D10, w, h, data); + } + else { + // @@ + nvCheck(false && "Format not handled with DDS9 header."); + } + } - int w = img.width(); - int h = img.height(); - int d = img.depth(); + free(data); + } + else { + // @@ Separate image decoder from dds reader. + Image img; + imageFromDDS(&img, dds, /*face=*/0, /*mipmap=*/0); - // @@ Add support for all pixel formats. + int w = img.width; + int h = img.height; + int d = img.depth; - this->setImage(nvtt::InputFormat_BGRA_8UB, w, h, d, img.pixels()); - } + // @@ Add support for all pixel formats. - return true; - } + this->setImage(nvtt::InputFormat_BGRA_8UB, w, h, d, img.pixels()); } - return false; + return true; } detach(); @@ -672,7 +676,7 @@ bool Surface::save(const char * fileName, bool hasAlpha/*=0*/, bool hdr/*=0*/) c nvCheck(image != NULL); if (hasAlpha) { - image->setFormat(Image::Format_ARGB); + image->format = Image::Format_ARGB; } return ImageIO::save(fileName, image.ptr()); diff --git a/src/nvtt/tests/bc1enc.cpp b/src/nvtt/tests/bc1enc.cpp index a48c2c1..38e4766 100644 --- a/src/nvtt/tests/bc1enc.cpp +++ b/src/nvtt/tests/bc1enc.cpp @@ -28,32 +28,19 @@ typedef unsigned char u8; typedef unsigned int u32; -// Defer statement: -#define CONCAT_INTERNAL(x, y) x##y -#define CONCAT(x, y) CONCAT_INTERNAL(x, y) +#define TEST_STB 1 +#define TEST_STB_HQ 1 -template -struct ExitScope -{ - T lambda; - ExitScope(T lambda) - : lambda(lambda) - { - } - ~ExitScope() { lambda(); } +#define TEST_NVTT_FAST 1 +#define TEST_NVTT_GELD 0 +#define TEST_NVTT 1 +#define TEST_NVTT_HQ 1 -private: - ExitScope& operator=(const ExitScope&); -}; +#define TEST_SQUISH 0 +#define TEST_SQUISH_HQ 0 -class ExitScopeHelp -{ -public: - template - ExitScope operator+(T t) { return t; } -}; +#define TEST_AMD_CMP 0 -#define defer const auto& __attribute__((unused)) CONCAT(defer__, __LINE__) = ExitScopeHelp() + [&]() static float mse_to_psnr(float mse) { @@ -109,14 +96,14 @@ void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, ui */ // Returns mse. -float evaluate_dxt1_mse(uint8 * rgba, uint8 * block, int block_count, int decoder = 2) { +float evaluate_dxt1_mse(uint8 * rgba, uint8 * block, int block_count, int decoder = 0) { double total = 0.0f; for (int b = 0; b < block_count; b++) { - total += nv::evaluate_dxt1_error(rgba, (BlockDXT1 *)block, decoder) / 255.0; + total += nv::evaluate_dxt1_error(rgba, (BlockDXT1 *)block, decoder); rgba += 4 * 4 * 4; block += 8; } - return float(total / (3 * 16 * block_count)); + return float(total / (16 * block_count)); } #define MAKEFOURCC(str) (uint(str[0]) | (uint(str[1]) << 8) | (uint(str[2]) << 16) | (uint(str[3]) << 24 )) @@ -180,7 +167,6 @@ bool output_dxt_dds (u32 w, u32 h, const u8* data, const char * filename) { return true; } -const int COMPRESSOR_COUNT = 7; struct Stats { const char * compressorName; Array mseArray; @@ -244,7 +230,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) { const int repeat_count = 1; // 8 #endif - { + if (TEST_STB) { memset(block_data, 0, block_count * 8); timer.start(); @@ -265,7 +251,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) { stats++; } - { + if (TEST_STB_HQ) { memset(block_data, 0, block_count * 8); timer.start(); @@ -286,7 +272,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) { stats++; } - { + if (TEST_NVTT_FAST) { memset(block_data, 0, block_count * 8); Vector3 color_weights(1); @@ -318,7 +304,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) { stats++; } - { + if (TEST_NVTT_GELD) { memset(block_data, 0, block_count * 8); timer.start(); @@ -340,7 +326,39 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) { stats++; } - { + if (TEST_NVTT) { + memset(block_data, 0, block_count * 8); + Vector3 color_weights(1); + + timer.start(); + for (int i = 0; i < repeat_count; i++) { + for (int b = 0; b < block_count; b++) { + Vector4 input_colors[16]; + float input_weights[16]; + for (int j = 0; j < 16; j++) { + input_colors[j].x = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f; + input_colors[j].y = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f; + input_colors[j].z = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f; + input_colors[j].w = 1.0f; + input_weights[j] = 1.0f; + } + + compress_dxt1(input_colors, input_weights, color_weights, false, false, (BlockDXT1*)(block_data + b * 8)); + } + } + timer.stop(); + + float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count); + //printf("nvtt hq \t%f\t-> %f %f\n", timer.elapsed(), sqrt(mse), mse_to_psnr(mse)); + + //output_dxt_dds(bw, bh, block_data, "nvtt_hq.dds"); + stats->compressorName = "nvtt"; + stats->mseArray[index] = mse; + stats->timeArray[index] = timer.elapsed(); + stats++; + } + + if (TEST_NVTT_HQ) { memset(block_data, 0, block_count * 8); Vector3 color_weights(1); @@ -357,7 +375,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) { input_weights[j] = 1.0f; } - compress_dxt1(input_colors, input_weights, color_weights, false, (BlockDXT1*)(block_data + b * 8)); + compress_dxt1(input_colors, input_weights, color_weights, true, true, (BlockDXT1*)(block_data + b * 8)); } } timer.stop(); @@ -372,7 +390,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) { stats++; } - { + if (TEST_SQUISH) { memset(block_data, 0, block_count * 8); timer.start(); @@ -393,7 +411,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) { stats++; } - /*{ + if (TEST_SQUISH_HQ) { memset(block_data, 0, block_count * 8); timer.start(); @@ -412,9 +430,9 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) { stats->mseArray[index] = mse; stats->timeArray[index] = timer.elapsed(); stats++; - }*/ + } - { + if (TEST_AMD_CMP) { memset(block_data, 0, block_count * 8); timer.start(); @@ -486,9 +504,12 @@ bool analyze_bc1(const char * inputFileName) { Timer timer; int stb_better_than_nvtt_fast = 0; + int stb_better_than_nvtt = 0; int stb_better_than_nvtt_hq = 0; int squish_better_than_nvtt_hq = 0; + int nvtt_hq_wins = 0; + int this_should_never_happen = 0; int this_should_never_happen_either = 0; @@ -526,7 +547,10 @@ bool analyze_bc1(const char * inputFileName) { compress_dxt1_fast_geld(rgba_block, (BlockDXT1*)dxt_block); float mse_nvtt_geld = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); - compress_dxt1(input_colors, input_weights, color_weights, false, (BlockDXT1*)dxt_block); + compress_dxt1(input_colors, input_weights, color_weights, true, false, (BlockDXT1*)dxt_block); + float mse_nvtt = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); + + compress_dxt1(input_colors, input_weights, color_weights, true, true, (BlockDXT1*)dxt_block); float mse_nvtt_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); squish::Compress(rgba_block, dxt_block, squish::kDxt1); @@ -538,9 +562,15 @@ bool analyze_bc1(const char * inputFileName) { if (mse_stb < mse_nvtt_fast) { stb_better_than_nvtt_fast++; } + if (mse_stb < mse_nvtt) { + stb_better_than_nvtt++; + } if (mse_stb < mse_nvtt_hq) { stb_better_than_nvtt_hq++; } + if (mse_nvtt_hq < mse_nvtt) { + nvtt_hq_wins++; + } if (mse_squish < mse_nvtt_hq) { squish_better_than_nvtt_hq++; } @@ -641,8 +671,8 @@ int main(int argc, char *argv[]) //const char * inputFileName = "testsuite/kodak/kodim18.png"; //const char * inputFileName = "testsuite/kodak/kodim15.png"; //const char * inputFileName = "testsuite/waterloo/frymire.png"; - // test_bc1(inputFileName, 0); - + + //test_bc1(inputFileName, 0); analyze_bc1(inputFileName); //const char ** set = roblox_set; @@ -651,9 +681,10 @@ int main(int argc, char *argv[]) const char ** set = image_set; int count = sizeof(image_set) / sizeof(char*); - Stats stats[COMPRESSOR_COUNT]; + const int MAX_COMPRESSOR_COUNT = 16; + Stats stats[MAX_COMPRESSOR_COUNT]; - for (int i = 0; i < COMPRESSOR_COUNT; i++) { + for (int i = 0; i < MAX_COMPRESSOR_COUNT; i++) { stats[i].compressorName = nullptr; stats[i].mseArray.resize(count, 0.0f); stats[i].timeArray.resize(count, 0.0f); @@ -664,16 +695,16 @@ int main(int argc, char *argv[]) test_bc1(set[i], i, stats); - for (int c = 0; c < COMPRESSOR_COUNT; c++) { + for (int c = 0; c < MAX_COMPRESSOR_COUNT; c++) { if (stats[c].compressorName) { - printf("%-16s %f\t%f\n", stats[c].compressorName, sqrtf(stats[c].mseArray[i]), stats[c].timeArray[i]); + printf("%-16s %f\t%f\t%f\n", stats[c].compressorName, sqrtf(stats[c].mseArray[i]), mse_to_psnr(stats[c].mseArray[i]), stats[c].timeArray[i]); } } } // Print stats. printf("\nAverage Results:\n"); - for (int c = 0; c < COMPRESSOR_COUNT; c++) { + for (int c = 0; c < MAX_COMPRESSOR_COUNT; c++) { if (stats[c].compressorName) { float sum = 0.0f; for (float it : stats[c].mseArray) { @@ -686,7 +717,7 @@ int main(int argc, char *argv[]) time += it; } - printf("%-16s %f\t%f\n", stats[c].compressorName, sqrtf(sum), time); + printf("%-16s %f\t%f\t%f\n", stats[c].compressorName, sqrtf(sum), mse_to_psnr(sum), time); } }