More cleanups!

This commit is contained in:
Ignacio 2020-03-30 10:12:29 -07:00
parent adce1a00da
commit ca3871a28c
5 changed files with 210 additions and 234 deletions

View File

@ -1,82 +1,13 @@
/* ----------------------------------------------------------------------------- // MIT license see full LICENSE text at end of file
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "ClusterFit.h" #include "ClusterFit.h"
#include "nvmath/Fitting.h" #include "nvmath/Fitting.h"
#include "nvmath/Vector.inl" #include "nvmath/Vector.inl"
#include "nvmath/ftoi.h"
#include "nvimage/ColorBlock.h"
#include <float.h> // FLT_MAX #include <float.h> // FLT_MAX
using namespace nv; using namespace nv;
ClusterFit::ClusterFit()
{
}
/*
// find minimum and maximum colors based on bounding box in color space
inline static void fit_colors_bbox(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
*c0 = Vector3(0);
*c1 = Vector3(1);
for (int i = 0; i < count; i++) {
*c0 = max(*c0, colors[i]);
*c1 = min(*c1, colors[i]);
}
}
inline static void select_diagonal(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
Vector3 center = (*c0 + *c1) * 0.5f;
Vector2 covariance = Vector2(0);
for (int i = 0; i < count; i++) {
Vector3 t = colors[i] - center;
covariance += t.xy() * t.z;
}
float x0 = c0->x;
float y0 = c0->y;
float x1 = c1->x;
float y1 = c1->y;
if (covariance.x < 0) {
swap(x0, x1);
}
if (covariance.y < 0) {
swap(y0, y1);
}
c0->set(x0, y0, c0->z);
c1->set(x1, y1, c1->z);
}
*/
void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count) void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count)
{ {
@ -91,17 +22,10 @@ void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int
m_count = count; m_count = count;
// I've tried using a lower quality approximation of the principal direction, but the best fit line seems to produce best results.
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(count, colors, weights, metric); Vector3 principal = Fit::computePrincipalComponent_PowerMethod(count, colors, weights, metric);
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(count, colors, weights, metric); //Vector3 principal = Fit::computePrincipalComponent_EigenSolver(count, colors, weights, metric);
/*// This approximation produces slightly lower quality:
Vector3 c0, c1;
fit_colors_bbox(colors, count, &c0, &c1);
select_diagonal(colors, count, &c0, &c1);
if (c0 != c1) {
principal = normalize(c1 - c0);
}*/
// build the list of values // build the list of values
int order[16]; int order[16];
float dps[16]; float dps[16];
@ -194,11 +118,11 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
SimdVector x0 = zero; SimdVector x0 = zero;
// check all possible clusters for this total order // check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++) for (int c0 = 0; c0 <= count; c0++)
{ {
SimdVector x1 = zero; SimdVector x1 = zero;
for( int c1 = 0; c1 <= count-c0; c1++) for (int c1 = 0; c1 <= count-c0; c1++)
{ {
const SimdVector x2 = m_xsum - x1 - x0; const SimdVector x2 = m_xsum - x1 - x0;
@ -238,7 +162,7 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ(); SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
// keep the solution if it wins // keep the solution if it wins
if( compareAnyLessThan( error, besterror ) ) if (compareAnyLessThan(error, besterror))
{ {
besterror = error; besterror = error;
beststart = a; beststart = a;
@ -252,9 +176,8 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
} }
// save the block if necessary // save the block if necessary
if( compareAnyLessThan( besterror, m_besterror ) ) if (compareAnyLessThan(besterror, m_besterror))
{ {
*start = beststart.toVector3(); *start = beststart.toVector3();
*end = bestend.toVector3(); *end = bestend.toVector3();
@ -288,15 +211,15 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
SimdVector x0 = zero; SimdVector x0 = zero;
// check all possible clusters for this total order // check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++) for (int c0 = 0; c0 <= count; c0++)
{ {
SimdVector x1 = zero; SimdVector x1 = zero;
for( int c1 = 0; c1 <= count-c0; c1++) for (int c1 = 0; c1 <= count-c0; c1++)
{ {
SimdVector x2 = zero; SimdVector x2 = zero;
for( int c2 = 0; c2 <= count-c0-c1; c2++) for (int c2 = 0; c2 <= count-c0-c1; c2++)
{ {
const SimdVector x3 = m_xsum - x2 - x1 - x0; const SimdVector x3 = m_xsum - x2 - x1 - x0;
@ -469,7 +392,7 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
} }
// save the block if necessary // save the block if necessary
if( besterror < m_besterror ) if (besterror < m_besterror)
{ {
*start = beststart; *start = beststart;
@ -582,3 +505,25 @@ bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
} }
#endif // NVTT_USE_SIMD #endif // NVTT_USE_SIMD
// Copyright (c) 2006-2020 Ignacio Castano icastano@nvidia.com
// Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -1,31 +1,5 @@
/* ----------------------------------------------------------------------------- // MIT license see full LICENSE text at end of file
#pragma once
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef NVTT_CLUSTERFIT_H
#define NVTT_CLUSTERFIT_H
#include "nvmath/SimdVector.h" #include "nvmath/SimdVector.h"
#include "nvmath/Vector.h" #include "nvmath/Vector.h"
@ -42,9 +16,8 @@ namespace nv {
class ClusterFit class ClusterFit
{ {
public: public:
ClusterFit(); ClusterFit() {}
//void setColorSet(const ColorSet * set);
void setColorSet(const Vector3 * colors, const float * weights, int count); void setColorSet(const Vector3 * colors, const float * weights, int count);
void setColorWeights(const Vector4 & w); void setColorWeights(const Vector4 & w);
@ -80,4 +53,24 @@ namespace nv {
} // nv namespace } // nv namespace
#endif // NVTT_CLUSTERFIT_H // Copyright (c) 2006-2020 Ignacio Castano icastano@nvidia.com
// Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -303,7 +303,10 @@ const Surface & CubeSurface::face(int f) const
bool CubeSurface::load(const char * fileName, int mipmap) bool CubeSurface::load(const char * fileName, int mipmap)
{ {
if (strEqual(Path::extension(fileName), ".dds")) { if (strEqual(Path::extension(fileName), ".dds")) {
nv::DirectDrawSurface dds(fileName); nv::DirectDrawSurface dds;
if (!dds.load(fileName)) {
return false;
}
if (!dds.isValid()/* || !dds.isSupported()*/) { if (!dds.isValid()/* || !dds.isSupported()*/) {
return false; return false;

View File

@ -44,6 +44,7 @@
#include "nvthread/ParallelFor.h" #include "nvthread/ParallelFor.h"
#include "nvcore/Array.inl" #include "nvcore/Array.inl"
#include "nvcore/StrLib.h"
#include <float.h> #include <float.h>
#include <string.h> // memset, memcpy #include <string.h> // memset, memcpy
@ -569,77 +570,80 @@ bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/)
AutoPtr<FloatImage> img(ImageIO::loadFloat(fileName)); AutoPtr<FloatImage> img(ImageIO::loadFloat(fileName));
if (img == NULL) { if (img == NULL) {
// Try loading as DDS. // Try loading as DDS.
if (nv::strEqual(nv::Path::extension(fileName), ".dds")) { if (!nv::strEqual(nv::Path::extension(fileName), ".dds")) {
nv::DirectDrawSurface dds; return false;
if (dds.load(fileName)) {
if (dds.header.isBlockFormat()) {
int w = dds.surfaceWidth(0);
int h = dds.surfaceHeight(0);
uint size = dds.surfaceSize(0);
void * data = malloc(size);
dds.readSurface(0, 0, data, size);
// @@ Handle all formats! @@ Get nvtt format from dds.surfaceFormat() ?
if (dds.header.hasDX10Header()) {
if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM_SRGB) {
this->setImage2D(nvtt::Format_BC1, nvtt::Decoder_D3D10, w, h, data);
}
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM_SRGB) {
this->setImage2D(nvtt::Format_BC2, nvtt::Decoder_D3D10, w, h, data);
}
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM_SRGB) {
this->setImage2D(nvtt::Format_BC3, nvtt::Decoder_D3D10, w, h, data);
}
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16) {
this->setImage2D(nvtt::Format_BC6, nvtt::Decoder_D3D10, w, h, data);
}
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM_SRGB) {
this->setImage2D(nvtt::Format_BC7, nvtt::Decoder_D3D10, w, h, data);
}
else {
// @@
nvCheck(false && "Format not handled with DDS10 header.");
}
}
else {
uint fourcc = dds.header.pf.fourcc;
if (fourcc == FOURCC_DXT1) {
this->setImage2D(nvtt::Format_BC1, nvtt::Decoder_D3D10, w, h, data);
}
else if (fourcc == FOURCC_DXT3) {
this->setImage2D(nvtt::Format_BC2, nvtt::Decoder_D3D10, w, h, data);
}
else if (fourcc == FOURCC_DXT5) {
this->setImage2D(nvtt::Format_BC3, nvtt::Decoder_D3D10, w, h, data);
}
else {
// @@
nvCheck(false && "Format not handled with DDS9 header.");
}
}
free(data);
}
else {
Image img;
dds.mipmap(&img, /*face=*/0, /*mipmap=*/0);
int w = img.width();
int h = img.height();
int d = img.depth();
// @@ Add support for all pixel formats.
this->setImage(nvtt::InputFormat_BGRA_8UB, w, h, d, img.pixels());
}
return true;
}
} }
return false; nv::DirectDrawSurface dds;
if (!dds.load(fileName)) {
return false;
}
if (dds.header.isBlockFormat()) {
int w = dds.surfaceWidth(0);
int h = dds.surfaceHeight(0);
uint size = dds.surfaceSize(0);
void * data = malloc(size);
dds.readSurface(0, 0, data, size);
// @@ Handle all formats! @@ Get nvtt format from dds.surfaceFormat() ?
if (dds.header.hasDX10Header()) {
if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM_SRGB) {
this->setImage2D(nvtt::Format_BC1, nvtt::Decoder_D3D10, w, h, data);
}
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM_SRGB) {
this->setImage2D(nvtt::Format_BC2, nvtt::Decoder_D3D10, w, h, data);
}
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM_SRGB) {
this->setImage2D(nvtt::Format_BC3, nvtt::Decoder_D3D10, w, h, data);
}
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16) {
this->setImage2D(nvtt::Format_BC6, nvtt::Decoder_D3D10, w, h, data);
}
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM || dds.header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM_SRGB) {
this->setImage2D(nvtt::Format_BC7, nvtt::Decoder_D3D10, w, h, data);
}
else {
// @@
nvCheck(false && "Format not handled with DDS10 header.");
}
}
else {
uint fourcc = dds.header.pf.fourcc;
if (fourcc == FOURCC_DXT1) {
this->setImage2D(nvtt::Format_BC1, nvtt::Decoder_D3D10, w, h, data);
}
else if (fourcc == FOURCC_DXT3) {
this->setImage2D(nvtt::Format_BC2, nvtt::Decoder_D3D10, w, h, data);
}
else if (fourcc == FOURCC_DXT5) {
this->setImage2D(nvtt::Format_BC3, nvtt::Decoder_D3D10, w, h, data);
}
else {
// @@
nvCheck(false && "Format not handled with DDS9 header.");
}
}
free(data);
}
else {
// @@ Separate image decoder from dds reader.
Image img;
imageFromDDS(&img, dds, /*face=*/0, /*mipmap=*/0);
int w = img.width;
int h = img.height;
int d = img.depth;
// @@ Add support for all pixel formats.
this->setImage(nvtt::InputFormat_BGRA_8UB, w, h, d, img.pixels());
}
return true;
} }
detach(); detach();
@ -672,7 +676,7 @@ bool Surface::save(const char * fileName, bool hasAlpha/*=0*/, bool hdr/*=0*/) c
nvCheck(image != NULL); nvCheck(image != NULL);
if (hasAlpha) { if (hasAlpha) {
image->setFormat(Image::Format_ARGB); image->format = Image::Format_ARGB;
} }
return ImageIO::save(fileName, image.ptr()); return ImageIO::save(fileName, image.ptr());

View File

@ -28,32 +28,19 @@ typedef unsigned char u8;
typedef unsigned int u32; typedef unsigned int u32;
// Defer statement: #define TEST_STB 1
#define CONCAT_INTERNAL(x, y) x##y #define TEST_STB_HQ 1
#define CONCAT(x, y) CONCAT_INTERNAL(x, y)
template<typename T> #define TEST_NVTT_FAST 1
struct ExitScope #define TEST_NVTT_GELD 0
{ #define TEST_NVTT 1
T lambda; #define TEST_NVTT_HQ 1
ExitScope(T lambda)
: lambda(lambda)
{
}
~ExitScope() { lambda(); }
private: #define TEST_SQUISH 0
ExitScope& operator=(const ExitScope&); #define TEST_SQUISH_HQ 0
};
class ExitScopeHelp #define TEST_AMD_CMP 0
{
public:
template<typename T>
ExitScope<T> operator+(T t) { return t; }
};
#define defer const auto& __attribute__((unused)) CONCAT(defer__, __LINE__) = ExitScopeHelp() + [&]()
static float mse_to_psnr(float mse) { static float mse_to_psnr(float mse) {
@ -109,14 +96,14 @@ void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, ui
*/ */
// Returns mse. // Returns mse.
float evaluate_dxt1_mse(uint8 * rgba, uint8 * block, int block_count, int decoder = 2) { float evaluate_dxt1_mse(uint8 * rgba, uint8 * block, int block_count, int decoder = 0) {
double total = 0.0f; double total = 0.0f;
for (int b = 0; b < block_count; b++) { for (int b = 0; b < block_count; b++) {
total += nv::evaluate_dxt1_error(rgba, (BlockDXT1 *)block, decoder) / 255.0; total += nv::evaluate_dxt1_error(rgba, (BlockDXT1 *)block, decoder);
rgba += 4 * 4 * 4; rgba += 4 * 4 * 4;
block += 8; block += 8;
} }
return float(total / (3 * 16 * block_count)); return float(total / (16 * block_count));
} }
#define MAKEFOURCC(str) (uint(str[0]) | (uint(str[1]) << 8) | (uint(str[2]) << 16) | (uint(str[3]) << 24 )) #define MAKEFOURCC(str) (uint(str[0]) | (uint(str[1]) << 8) | (uint(str[2]) << 16) | (uint(str[3]) << 24 ))
@ -180,7 +167,6 @@ bool output_dxt_dds (u32 w, u32 h, const u8* data, const char * filename) {
return true; return true;
} }
const int COMPRESSOR_COUNT = 7;
struct Stats { struct Stats {
const char * compressorName; const char * compressorName;
Array<float> mseArray; Array<float> mseArray;
@ -244,7 +230,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
const int repeat_count = 1; // 8 const int repeat_count = 1; // 8
#endif #endif
{ if (TEST_STB) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
timer.start(); timer.start();
@ -265,7 +251,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
stats++; stats++;
} }
{ if (TEST_STB_HQ) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
timer.start(); timer.start();
@ -286,7 +272,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
stats++; stats++;
} }
{ if (TEST_NVTT_FAST) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
Vector3 color_weights(1); Vector3 color_weights(1);
@ -318,7 +304,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
stats++; stats++;
} }
{ if (TEST_NVTT_GELD) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
timer.start(); timer.start();
@ -340,7 +326,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
stats++; stats++;
} }
{ if (TEST_NVTT) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
Vector3 color_weights(1); Vector3 color_weights(1);
@ -357,7 +343,39 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
input_weights[j] = 1.0f; input_weights[j] = 1.0f;
} }
compress_dxt1(input_colors, input_weights, color_weights, false, (BlockDXT1*)(block_data + b * 8)); compress_dxt1(input_colors, input_weights, color_weights, false, false, (BlockDXT1*)(block_data + b * 8));
}
}
timer.stop();
float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count);
//printf("nvtt hq \t%f\t-> %f %f\n", timer.elapsed(), sqrt(mse), mse_to_psnr(mse));
//output_dxt_dds(bw, bh, block_data, "nvtt_hq.dds");
stats->compressorName = "nvtt";
stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed();
stats++;
}
if (TEST_NVTT_HQ) {
memset(block_data, 0, block_count * 8);
Vector3 color_weights(1);
timer.start();
for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) {
Vector4 input_colors[16];
float input_weights[16];
for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f;
input_colors[j].w = 1.0f;
input_weights[j] = 1.0f;
}
compress_dxt1(input_colors, input_weights, color_weights, true, true, (BlockDXT1*)(block_data + b * 8));
} }
} }
timer.stop(); timer.stop();
@ -372,7 +390,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
stats++; stats++;
} }
{ if (TEST_SQUISH) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
timer.start(); timer.start();
@ -393,7 +411,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
stats++; stats++;
} }
/*{ if (TEST_SQUISH_HQ) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
timer.start(); timer.start();
@ -412,9 +430,9 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed();
stats++; stats++;
}*/ }
{ if (TEST_AMD_CMP) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
timer.start(); timer.start();
@ -486,9 +504,12 @@ bool analyze_bc1(const char * inputFileName) {
Timer timer; Timer timer;
int stb_better_than_nvtt_fast = 0; int stb_better_than_nvtt_fast = 0;
int stb_better_than_nvtt = 0;
int stb_better_than_nvtt_hq = 0; int stb_better_than_nvtt_hq = 0;
int squish_better_than_nvtt_hq = 0; int squish_better_than_nvtt_hq = 0;
int nvtt_hq_wins = 0;
int this_should_never_happen = 0; int this_should_never_happen = 0;
int this_should_never_happen_either = 0; int this_should_never_happen_either = 0;
@ -526,7 +547,10 @@ bool analyze_bc1(const char * inputFileName) {
compress_dxt1_fast_geld(rgba_block, (BlockDXT1*)dxt_block); compress_dxt1_fast_geld(rgba_block, (BlockDXT1*)dxt_block);
float mse_nvtt_geld = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt_geld = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block);
compress_dxt1(input_colors, input_weights, color_weights, false, (BlockDXT1*)dxt_block); compress_dxt1(input_colors, input_weights, color_weights, true, false, (BlockDXT1*)dxt_block);
float mse_nvtt = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block);
compress_dxt1(input_colors, input_weights, color_weights, true, true, (BlockDXT1*)dxt_block);
float mse_nvtt_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block);
squish::Compress(rgba_block, dxt_block, squish::kDxt1); squish::Compress(rgba_block, dxt_block, squish::kDxt1);
@ -538,9 +562,15 @@ bool analyze_bc1(const char * inputFileName) {
if (mse_stb < mse_nvtt_fast) { if (mse_stb < mse_nvtt_fast) {
stb_better_than_nvtt_fast++; stb_better_than_nvtt_fast++;
} }
if (mse_stb < mse_nvtt) {
stb_better_than_nvtt++;
}
if (mse_stb < mse_nvtt_hq) { if (mse_stb < mse_nvtt_hq) {
stb_better_than_nvtt_hq++; stb_better_than_nvtt_hq++;
} }
if (mse_nvtt_hq < mse_nvtt) {
nvtt_hq_wins++;
}
if (mse_squish < mse_nvtt_hq) { if (mse_squish < mse_nvtt_hq) {
squish_better_than_nvtt_hq++; squish_better_than_nvtt_hq++;
} }
@ -641,8 +671,8 @@ int main(int argc, char *argv[])
//const char * inputFileName = "testsuite/kodak/kodim18.png"; //const char * inputFileName = "testsuite/kodak/kodim18.png";
//const char * inputFileName = "testsuite/kodak/kodim15.png"; //const char * inputFileName = "testsuite/kodak/kodim15.png";
//const char * inputFileName = "testsuite/waterloo/frymire.png"; //const char * inputFileName = "testsuite/waterloo/frymire.png";
// test_bc1(inputFileName, 0);
//test_bc1(inputFileName, 0);
analyze_bc1(inputFileName); analyze_bc1(inputFileName);
//const char ** set = roblox_set; //const char ** set = roblox_set;
@ -651,9 +681,10 @@ int main(int argc, char *argv[])
const char ** set = image_set; const char ** set = image_set;
int count = sizeof(image_set) / sizeof(char*); int count = sizeof(image_set) / sizeof(char*);
Stats stats[COMPRESSOR_COUNT]; const int MAX_COMPRESSOR_COUNT = 16;
Stats stats[MAX_COMPRESSOR_COUNT];
for (int i = 0; i < COMPRESSOR_COUNT; i++) { for (int i = 0; i < MAX_COMPRESSOR_COUNT; i++) {
stats[i].compressorName = nullptr; stats[i].compressorName = nullptr;
stats[i].mseArray.resize(count, 0.0f); stats[i].mseArray.resize(count, 0.0f);
stats[i].timeArray.resize(count, 0.0f); stats[i].timeArray.resize(count, 0.0f);
@ -664,16 +695,16 @@ int main(int argc, char *argv[])
test_bc1(set[i], i, stats); test_bc1(set[i], i, stats);
for (int c = 0; c < COMPRESSOR_COUNT; c++) { for (int c = 0; c < MAX_COMPRESSOR_COUNT; c++) {
if (stats[c].compressorName) { if (stats[c].compressorName) {
printf("%-16s %f\t%f\n", stats[c].compressorName, sqrtf(stats[c].mseArray[i]), stats[c].timeArray[i]); printf("%-16s %f\t%f\t%f\n", stats[c].compressorName, sqrtf(stats[c].mseArray[i]), mse_to_psnr(stats[c].mseArray[i]), stats[c].timeArray[i]);
} }
} }
} }
// Print stats. // Print stats.
printf("\nAverage Results:\n"); printf("\nAverage Results:\n");
for (int c = 0; c < COMPRESSOR_COUNT; c++) { for (int c = 0; c < MAX_COMPRESSOR_COUNT; c++) {
if (stats[c].compressorName) { if (stats[c].compressorName) {
float sum = 0.0f; float sum = 0.0f;
for (float it : stats[c].mseArray) { for (float it : stats[c].mseArray) {
@ -686,7 +717,7 @@ int main(int argc, char *argv[])
time += it; time += it;
} }
printf("%-16s %f\t%f\n", stats[c].compressorName, sqrtf(sum), time); printf("%-16s %f\t%f\t%f\n", stats[c].compressorName, sqrtf(sum), mse_to_psnr(sum), time);
} }
} }