Browse Source

Add baboon image to waterloo image set.

pull/310/head
Ignacio 1 year ago
parent
commit
4a33d1ac75
3 changed files with 1003 additions and 0 deletions
  1. BIN
      data/testsuite/waterloo/baboon.png
  2. +309
    -0
      extern/libsquish-1.15/squish.h
  3. +694
    -0
      src/nvtt/tests/bc1enc.cpp

BIN
data/testsuite/waterloo/baboon.png View File

Before After
Width: 512  |  Height: 512  |  Size: 622 KiB

+ 309
- 0
extern/libsquish-1.15/squish.h View File

@ -0,0 +1,309 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_H
#define SQUISH_H
//! All squish API functions live in this namespace.
namespace squish {
// -----------------------------------------------------------------------------
//! Typedef a quantity that is a single unsigned byte.
typedef unsigned char u8;
// -----------------------------------------------------------------------------
enum
{
//! Use DXT1 compression.
kDxt1 = ( 1 << 0 ),
//! Use DXT3 compression.
kDxt3 = ( 1 << 1 ),
//! Use DXT5 compression.
kDxt5 = ( 1 << 2 ),
//! Use BC4 compression.
kBc4 = ( 1 << 3 ),
//! Use BC5 compression.
kBc5 = ( 1 << 4 ),
//! Use a slow but high quality colour compressor (the default).
kColourClusterFit = ( 1 << 5 ),
//! Use a fast but low quality colour compressor.
kColourRangeFit = ( 1 << 6 ),
//! Weight the colour by alpha during cluster fit (disabled by default).
kWeightColourByAlpha = ( 1 << 7 ),
//! Use a very slow but very high quality colour compressor.
kColourIterativeClusterFit = ( 1 << 8 ),
//! Source is BGRA rather than RGBA
kSourceBGRA = ( 1 << 9 )
};
// -----------------------------------------------------------------------------
/*! @brief Compresses a 4x4 block of pixels.
@param rgba The rgba values of the 16 source pixels.
@param mask The valid pixel mask.
@param block Storage for the compressed DXT block.
@param flags Compression flags.
@param metric An optional perceptual metric.
The source pixels should be presented as a contiguous array of 16 rgba
values, with each component as 1 byte each. In memory this should be:
{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
The mask parameter enables only certain pixels within the block. The lowest
bit enables the first pixel and so on up to the 16th bit. Bits beyond the
16th bit are ignored. Pixels that are not enabled are allowed to take
arbitrary colours in the output block. An example of how this can be used
is in the CompressImage function to disable pixels outside the bounds of
the image when the width or height is not divisible by 4.
The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
however, DXT1 will be used by default if none is specified. When using DXT1
compression, 8 bytes of storage are required for the compressed DXT block.
DXT3 and DXT5 compression require 16 bytes of storage per block.
The flags parameter can also specify a preferred colour compressor to use
when fitting the RGB components of the data. Possible colour compressors
are: kColourClusterFit (the default), kColourRangeFit (very fast, low
quality) or kColourIterativeClusterFit (slowest, best quality).
When using kColourClusterFit or kColourIterativeClusterFit, an additional
flag can be specified to weight the importance of each pixel by its alpha
value. For images that are rendered using alpha blending, this can
significantly increase the perceived quality.
The metric parameter can be used to weight the relative importance of each
colour channel, or pass NULL to use the default uniform weight of
{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
allowed either uniform or "perceptual" weights with the fixed values
{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
contiguous array of 3 floats.
*/
void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric = 0 );
// -----------------------------------------------------------------------------
/*! @brief Compresses a 4x4 block of pixels.
@param rgba The rgba values of the 16 source pixels.
@param block Storage for the compressed DXT block.
@param flags Compression flags.
@param metric An optional perceptual metric.
The source pixels should be presented as a contiguous array of 16 rgba
values, with each component as 1 byte each. In memory this should be:
{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
however, DXT1 will be used by default if none is specified. When using DXT1
compression, 8 bytes of storage are required for the compressed DXT block.
DXT3 and DXT5 compression require 16 bytes of storage per block.
The flags parameter can also specify a preferred colour compressor to use
when fitting the RGB components of the data. Possible colour compressors
are: kColourClusterFit (the default), kColourRangeFit (very fast, low
quality) or kColourIterativeClusterFit (slowest, best quality).
When using kColourClusterFit or kColourIterativeClusterFit, an additional
flag can be specified to weight the importance of each pixel by its alpha
value. For images that are rendered using alpha blending, this can
significantly increase the perceived quality.
The metric parameter can be used to weight the relative importance of each
colour channel, or pass NULL to use the default uniform weight of
{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
allowed either uniform or "perceptual" weights with the fixed values
{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
contiguous array of 3 floats.
This method is an inline that calls CompressMasked with a mask of 0xffff,
provided for compatibility with older versions of squish.
*/
inline void Compress( u8 const* rgba, void* block, int flags, float* metric = 0 )
{
CompressMasked( rgba, 0xffff, block, flags, metric );
}
// -----------------------------------------------------------------------------
/*! @brief Decompresses a 4x4 block of pixels.
@param rgba Storage for the 16 decompressed pixels.
@param block The compressed DXT block.
@param flags Compression flags.
The decompressed pixels will be written as a contiguous array of 16 rgba
values, with each component as 1 byte each. In memory this is:
{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
however, DXT1 will be used by default if none is specified. All other flags
are ignored.
*/
void Decompress( u8* rgba, void const* block, int flags );
// -----------------------------------------------------------------------------
/*! @brief Computes the amount of compressed storage required.
@param width The width of the image.
@param height The height of the image.
@param flags Compression flags.
The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
however, DXT1 will be used by default if none is specified. All other flags
are ignored.
Most DXT images will be a multiple of 4 in each dimension, but this
function supports arbitrary size images by allowing the outer blocks to
be only partially used.
*/
int GetStorageRequirements( int width, int height, int flags );
// -----------------------------------------------------------------------------
/*! @brief Compresses an image in memory.
@param rgba The pixels of the source.
@param width The width of the source image.
@param height The height of the source image.
@param pitch The pitch of the source image.
@param blocks Storage for the compressed output.
@param flags Compression flags.
@param metric An optional perceptual metric.
The source pixels should be presented as a contiguous array of width*height
rgba values, with each component as 1 byte each. In memory this should be:
{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
however, DXT1 will be used by default if none is specified. When using DXT1
compression, 8 bytes of storage are required for each compressed DXT block.
DXT3 and DXT5 compression require 16 bytes of storage per block.
The flags parameter can also specify a preferred colour compressor to use
when fitting the RGB components of the data. Possible colour compressors
are: kColourClusterFit (the default), kColourRangeFit (very fast, low
quality) or kColourIterativeClusterFit (slowest, best quality).
When using kColourClusterFit or kColourIterativeClusterFit, an additional
flag can be specified to weight the importance of each pixel by its alpha
value. For images that are rendered using alpha blending, this can
significantly increase the perceived quality.
The metric parameter can be used to weight the relative importance of each
colour channel, or pass NULL to use the default uniform weight of
{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
allowed either uniform or "perceptual" weights with the fixed values
{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
contiguous array of 3 floats.
Internally this function calls squish::CompressMasked for each block, which
allows for pixels outside the image to take arbitrary values. The function
squish::GetStorageRequirements can be called to compute the amount of memory
to allocate for the compressed output.
Note on compression quality: When compressing textures with
libsquish it is recommended to apply a gamma-correction
beforehand. This will reduce the blockiness in dark areas. The
level of necessary gamma-correction is platform dependent. For
example, a gamma correction with gamma = 0.5 before compression
and gamma = 2.0 after decompression yields good results on the
Windows platform but for other platforms like MacOS X a different
gamma value may be more suitable.
*/
void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric = 0 );
void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric = 0 );
// -----------------------------------------------------------------------------
/*! @brief Decompresses an image in memory.
@param rgba Storage for the decompressed pixels.
@param width The width of the source image.
@param height The height of the source image.
@param pitch The pitch of the decompressed pixels.
@param blocks The compressed DXT blocks.
@param flags Compression flags.
The decompressed pixels will be written as a contiguous array of width*height
16 rgba values, with each component as 1 byte each. In memory this is:
{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
however, DXT1 will be used by default if none is specified. All other flags
are ignored.
Internally this function calls squish::Decompress for each block.
*/
void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags );
void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags );
// -----------------------------------------------------------------------------
/*! @brief Computes MSE of an compressed image in memory.
@param rgba The original image pixels.
@param width The width of the source image.
@param height The height of the source image.
@param pitch The pitch of the source image.
@param dxt The compressed dxt blocks
@param flags Compression flags.
@param colourMSE The MSE of the colour values.
@param alphaMSE The MSE of the alpha values.
The colour MSE and alpha MSE are computed across all pixels. The colour MSE is
averaged across all rgb values (i.e. colourMSE = sum sum_k ||dxt.k - rgba.k||/3)
The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
however, DXT1 will be used by default if none is specified. All other flags
are ignored.
Internally this function calls squish::Decompress for each block.
*/
void ComputeMSE(u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE);
void ComputeMSE(u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE);
// -----------------------------------------------------------------------------
} // namespace squish
#endif // ndef SQUISH_H

+ 694
- 0
src/nvtt/tests/bc1enc.cpp View File

@ -0,0 +1,694 @@
#define _CRT_SECURE_NO_WARNINGS
#include <assert.h>
#include <stdlib.h>
//#define STBI_ASSERT(x)
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_DXT_IMPLEMENTATION
#include "stb_dxt.h"
#include "../extern/libsquish-1.15/squish.h"
#include "../extern/CMP_Core/source/CMP_Core.h"
#include "nvtt/CompressorDXT1.h"
#include "nvmath/Vector.h"
#include "nvmath/Color.h"
#include "nvcore/Timer.h"
#include "nvcore/Array.inl"
using namespace nv;
typedef unsigned char u8;
typedef unsigned int u32;
// Defer statement:
#define CONCAT_INTERNAL(x, y) x##y
#define CONCAT(x, y) CONCAT_INTERNAL(x, y)
template<typename T>
struct ExitScope
{
T lambda;
ExitScope(T lambda)
: lambda(lambda)
{
}
~ExitScope() { lambda(); }
private:
ExitScope& operator=(const ExitScope&);
};
class ExitScopeHelp
{
public:
template<typename T>
ExitScope<T> operator+(T t) { return t; }
};
#define defer const auto& __attribute__((unused)) CONCAT(defer__, __LINE__) = ExitScopeHelp() + [&]()
static float mse_to_psnr(float mse) {
float rms = sqrtf(mse);
float psnr = rms ? (float)clamp(log10(255.0 / rms) * 20.0, 0.0, 300.0) : 1e+10f;
return psnr;
}
/*
void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma)
{
//assert((first_chan < 4U) && (first_chan + total_chans <= 4U));
const uint32_t width = std::min(a.get_width(), b.get_width());
const uint32_t height = std::min(a.get_height(), b.get_height());
double hist[256];
memset(hist, 0, sizeof(hist));
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
{
const color_rgba &ca = a(x, y), &cb = b(x, y);
for (uint32_t c = 0; c < 3; c++)
hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++;
}
}
m_max = 0;
double sum = 0.0f, sum2 = 0.0f;
for (uint32_t i = 0; i < 256; i++)
{
if (hist[i])
{
m_max = std::max<float>(m_max, (float)i);
double v = i * hist[i];
sum += v;
sum2 += i * v;
}
}
double total_values = (double)width * (double)height;
if (avg_comp_error)
total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);
m_mean = (float)clamp<double>(sum / total_values, 0.0f, 255.0);
m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, 255.0 * 255.0);
m_rms = (float)sqrt(m_mean_squared);
m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0, 0.0f, 300.0f) : 1e+10f;
}
*/
// Returns mse.
float evaluate_dxt1_mse(uint8 * rgba, uint8 * block, int block_count, int decoder = 2) {
double total = 0.0f;
for (int b = 0; b < block_count; b++) {
total += nv::evaluate_dxt1_error(rgba, (BlockDXT1 *)block, decoder) / 255.0;
rgba += 4 * 4 * 4;
block += 8;
}
return float(total / (3 * 16 * block_count));
}
#define MAKEFOURCC(str) (uint(str[0]) | (uint(str[1]) << 8) | (uint(str[2]) << 16) | (uint(str[3]) << 24 ))
bool output_dxt_dds (u32 w, u32 h, const u8* data, const char * filename) {
const u32 DDSD_CAPS = 0x00000001;
const u32 DDSD_PIXELFORMAT = 0x00001000;
const u32 DDSD_WIDTH = 0x00000004;
const u32 DDSD_HEIGHT = 0x00000002;
const u32 DDSD_LINEARSIZE = 0x00080000;
const u32 DDPF_FOURCC = 0x00000004;
const u32 DDSCAPS_TEXTURE = 0x00001000;
struct DDS {
u32 fourcc = MAKEFOURCC("DDS ");
u32 size = 124;
u32 flags = DDSD_CAPS|DDSD_PIXELFORMAT|DDSD_WIDTH|DDSD_HEIGHT|DDSD_LINEARSIZE;
u32 height;
u32 width;
u32 pitch;
u32 depth;
u32 mipmapcount;
u32 reserved [11];
struct {
u32 size = 32;
u32 flags = DDPF_FOURCC;
u32 fourcc = MAKEFOURCC("DXT1");
u32 bitcount;
u32 rmask;
u32 gmask;
u32 bmask;
u32 amask;
} pf;
struct {
u32 caps1 = DDSCAPS_TEXTURE;
u32 caps2;
u32 caps3;
u32 caps4;
} caps;
u32 notused;
} dds;
static_assert(sizeof(DDS) == 128, "DDS size must be 128");
dds.width = w;
dds.height = h;
dds.pitch = 8 * ((w+3)/4 * (h+3)/4); // linear size
FILE * fp = fopen(filename, "wb");
if (fp == nullptr) return false;
// Write header:
fwrite(&dds, sizeof(dds), 1, fp);
// Write dxt data:
fwrite(data, dds.pitch, 1, fp);
fclose(fp);
return true;
}
const int COMPRESSOR_COUNT = 7;
struct Stats {
const char * compressorName;
Array<float> mseArray;
Array<float> timeArray;
};
bool test_bc1(const char * inputFileName, int index, Stats * stats) {
int w, h, n;
unsigned char *input_data = stbi_load(inputFileName, &w, &h, &n, 4);
defer { stbi_image_free(input_data); };
if (input_data == nullptr) {
printf("Failed to load input image '%s'.\n", inputFileName);
return false;
}
int block_count = (w / 4) * (h / 4);
u8 * rgba_block_data = (u8 *)malloc(block_count * 4 * 4 * 4);
defer { free(rgba_block_data); };
int bw = 4 * (w / 4); // Round down.
int bh = 4 * (h / 4);
// Convert to block layout.
for (int y = 0, b = 0; y < bh; y += 4) {
for (int x = 0; x < bw; x += 4, b++) {
for (int yy = 0; yy < 4; yy++) {
for (int xx = 0; xx < 4; xx++) {
if (x + xx < w && y + yy < h) {
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 0] = input_data[((y + yy) * w + x + xx) * 4 + 0];
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 1] = input_data[((y + yy) * w + x + xx) * 4 + 1];
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 2] = input_data[((y + yy) * w + x + xx) * 4 + 2];
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 3] = input_data[((y + yy) * w + x + xx) * 4 + 3];
}
else {
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 0] = 0;
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 1] = 0;
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 2] = 0;
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 3] = 0;
}
}
}
}
}
u8 * block_data = (u8 *)malloc(block_count * 8);
Timer timer;
// Warmup.
for (int b = 0; b < block_count; b++) {
stb_compress_dxt_block(block_data + b * 8, rgba_block_data + b * 4 * 4 * 4, 0, STB_DXT_NORMAL);
}
#if _DEBUG
const int repeat_count = 1;
#else
const int repeat_count = 1; // 8
#endif
{
memset(block_data, 0, block_count * 8);
timer.start();
for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) {
stb_compress_dxt_block(block_data + b * 8, rgba_block_data + b * 4 * 4 * 4, 0, STB_DXT_NORMAL);
}
}
timer.stop();
float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count);
//printf("stb_dxt \t%f\t-> %f %f\n", timer.elapsed(), sqrt(mse), mse_to_psnr(mse));
//output_dxt_dds(bw, bh, block_data, "stb_dxt.dds");
stats->compressorName = "stb";
stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed();
stats++;
}
{
memset(block_data, 0, block_count * 8);
timer.start();
for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) {
stb_compress_dxt_block(block_data + b * 8, rgba_block_data + b * 4 * 4 * 4, 0, STB_DXT_HIGHQUAL);
}
}
timer.stop();
float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count);
//printf("stb_dxt hq \t%f\t-> %f %f\n", timer.elapsed(), sqrt(mse), mse_to_psnr(mse));
//output_dxt_dds(bw, bh, block_data, "stb_dxt_hq.dds");
stats->compressorName = "stb-hq";
stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed();
stats++;
}
{
memset(block_data, 0, block_count * 8);
Vector3 color_weights(1);
timer.start();
for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) {
Vector4 input_colors[16];
float input_weights[16];
for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f;
input_colors[j].w = 255.0f;
input_weights[j] = 1.0f;
}
compress_dxt1_fast(input_colors, input_weights, color_weights, (BlockDXT1*)(block_data + b * 8));
}
}
timer.stop();
float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count);
//printf("nvtt fast \t%f\t-> %f %f\n", timer.elapsed(), sqrt(mse), mse_to_psnr(mse));
//output_dxt_dds(bw, bh, block_data, "nvtt_fast.dds");
stats->compressorName = "nvtt-fast";
stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed();
stats++;
}
{
memset(block_data, 0, block_count * 8);
timer.start();
for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) {
//compress_dxt1_fast2(rgba_block_data + b * 4 * 4 * 4, (BlockDXT1*)(block_data + b * 8));
compress_dxt1_fast_geld(rgba_block_data + b * 4 * 4 * 4, (BlockDXT1*)(block_data + b * 8));
}
}
timer.stop();
float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count);
//printf("nvtt fast2 \t%f\t-> %f %f\n", timer.elapsed(), sqrt(mse), mse_to_psnr(mse));
//output_dxt_dds(bw, bh, block_data, "nvtt_fast2.dds");
stats->compressorName = "nvtt-geld";
stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed();
stats++;
}
{
memset(block_data, 0, block_count * 8);
Vector3 color_weights(1);
timer.start();
for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) {
Vector4 input_colors[16];
float input_weights[16];
for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f;
input_colors[j].w = 1.0f;
input_weights[j] = 1.0f;
}
compress_dxt1(input_colors, input_weights, color_weights, false, (BlockDXT1*)(block_data + b * 8));
}
}
timer.stop();
float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count);
//printf("nvtt hq \t%f\t-> %f %f\n", timer.elapsed(), sqrt(mse), mse_to_psnr(mse));
//output_dxt_dds(bw, bh, block_data, "nvtt_hq.dds");
stats->compressorName = "nvtt-hq";
stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed();
stats++;
}
{
memset(block_data, 0, block_count * 8);
timer.start();
for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) {
squish::Compress(rgba_block_data + b * 4 * 4 * 4, block_data + b * 8, squish::kDxt1);
}
}
timer.stop();
float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count);
//printf("squish \t%f\t-> %f %f\n", timer.elapsed(), sqrt(mse), mse_to_psnr(mse));
//output_dxt_dds(bw, bh, block_data, "squish.dds");
stats->compressorName = "squish";
stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed();
stats++;
}
/*{
memset(block_data, 0, block_count * 8);
timer.start();
for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) {
squish::Compress(rgba_block_data + b * 4 * 4 * 4, block_data + b * 8, squish::kDxt1 | squish::kColourIterativeClusterFit);
}
}
timer.stop();
float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count);
//printf("squish hq\t%f\t-> %f %f\n", timer.elapsed(), sqrt(mse), mse_to_psnr(mse));
//output_dxt_dds(bw, bh, block_data, "squish_hq.dds");
stats->compressorName = "squish-hq";
stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed();
stats++;
}*/
{
memset(block_data, 0, block_count * 8);
timer.start();
for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) {
CompressBlockBC1(rgba_block_data + b * 4 * 4 * 4, 16, block_data + b * 8, nullptr);
}
}
timer.stop();
float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count);
//printf("squish \t%f\t-> %f %f\n", timer.elapsed(), sqrt(mse), mse_to_psnr(mse));
//output_dxt_dds(bw, bh, block_data, "squish.dds");
stats->compressorName = "cmp";
stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed();
stats++;
}
return false;
}
bool analyze_bc1(const char * inputFileName) {
int w, h, n;
unsigned char *input_data = stbi_load(inputFileName, &w, &h, &n, 4);
defer { stbi_image_free(input_data); };
if (input_data == nullptr) {
printf("Failed to load input image '%s'.\n", inputFileName);
return false;
}
int block_count = (w / 4) * (h / 4);
u8 * rgba_block_data = (u8 *)malloc(block_count * 4 * 4 * 4);
defer { free(rgba_block_data); };
int bw = 4 * (w / 4); // Round down.
int bh = 4 * (h / 4);
// Convert to block layout.
for (int y = 0, b = 0; y < bh; y += 4) {
for (int x = 0; x < bw; x += 4, b++) {
for (int yy = 0; yy < 4; yy++) {
for (int xx = 0; xx < 4; xx++) {
if (x + xx < w && y + yy < h) {
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 0] = input_data[((y + yy) * w + x + xx) * 4 + 0];
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 1] = input_data[((y + yy) * w + x + xx) * 4 + 1];
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 2] = input_data[((y + yy) * w + x + xx) * 4 + 2];
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 3] = input_data[((y + yy) * w + x + xx) * 4 + 3];
}
else {
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 0] = 0;
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 1] = 0;
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 2] = 0;
rgba_block_data[b * 4 * 4 * 4 + (yy * 4 + xx) * 4 + 3] = 0;
}
}
}
}
}
u8 * block_data = (u8 *)malloc(block_count * 8);
memset(block_data, 0, block_count * 8);
Timer timer;
int stb_better_than_nvtt_fast = 0;
int stb_better_than_nvtt_hq = 0;
int squish_better_than_nvtt_hq = 0;
int this_should_never_happen = 0;
int this_should_never_happen_either = 0;
Vector3 color_weights(1);
for (int b = 0; b < block_count; b++) {
uint8 * rgba_block = rgba_block_data + b * 4 * 4 * 4;
uint8 * dxt_block = block_data + b * 8;
Vector4 input_colors[16];
float input_weights[16];
for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block[j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block[j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block[j * 4 + 2] / 255.0f;
input_colors[j].w = 255.0f;
input_weights[j] = 1.0f;
}
// Compare all the different modes on the same block:
stb_compress_dxt_block(dxt_block, rgba_block, 0, STB_DXT_NORMAL);
float mse_stb = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block);
stb_compress_dxt_block(dxt_block, rgba_block, 0, STB_DXT_HIGHQUAL);
float mse_stb_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block);
compress_dxt1_fast(input_colors, input_weights, color_weights, (BlockDXT1*)dxt_block);
float mse_nvtt_fast = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block);
compress_dxt1_fast2(rgba_block, (BlockDXT1*)dxt_block);
float mse_nvtt_fast2 = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block);
compress_dxt1_fast_geld(rgba_block, (BlockDXT1*)dxt_block);
float mse_nvtt_geld = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block);
compress_dxt1(input_colors, input_weights, color_weights, false, (BlockDXT1*)dxt_block);
float mse_nvtt_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block);
squish::Compress(rgba_block, dxt_block, squish::kDxt1);
float mse_squish = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block);
squish::Compress(rgba_block, dxt_block, squish::kDxt1 | squish::kColourIterativeClusterFit);
float mse_squish_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block);
if (mse_stb < mse_nvtt_fast) {
stb_better_than_nvtt_fast++;
}
if (mse_stb < mse_nvtt_hq) {
stb_better_than_nvtt_hq++;
}
if (mse_squish < mse_nvtt_hq) {
squish_better_than_nvtt_hq++;
}
if (mse_nvtt_fast < mse_nvtt_hq) {
this_should_never_happen++;
}
if (mse_nvtt_fast2 < mse_nvtt_fast) {
this_should_never_happen_either++;
}
}
return true;
}
const char * image_set[] = {
"testsuite/kodak/kodim01.png",
"testsuite/kodak/kodim02.png",
"testsuite/kodak/kodim03.png",
"testsuite/kodak/kodim04.png",
"testsuite/kodak/kodim05.png",
"testsuite/kodak/kodim06.png",
"testsuite/kodak/kodim07.png",
"testsuite/kodak/kodim08.png",
"testsuite/kodak/kodim09.png",
"testsuite/kodak/kodim10.png",
"testsuite/kodak/kodim11.png",
"testsuite/kodak/kodim12.png",
"testsuite/kodak/kodim13.png",
"testsuite/kodak/kodim14.png",
"testsuite/kodak/kodim15.png",
"testsuite/kodak/kodim16.png",
"testsuite/kodak/kodim17.png",
"testsuite/kodak/kodim18.png",
"testsuite/kodak/kodim19.png",
"testsuite/kodak/kodim20.png",
"testsuite/kodak/kodim21.png",
"testsuite/kodak/kodim22.png",
"testsuite/kodak/kodim23.png",
"testsuite/kodak/kodim24.png",
"testsuite/waterloo/clegg.png",
"testsuite/waterloo/frymire.png",
"testsuite/waterloo/lena.png",
"testsuite/waterloo/monarch.png",
"testsuite/waterloo/peppers.png",
"testsuite/waterloo/sail.png",
"testsuite/waterloo/serrano.png",
"testsuite/waterloo/tulips.png",
};
const char * roblox_set[] = {
"Roblox/asphalt_side/diffuse.tga",
"Roblox/asphalt_top/diffuse.tga",
"Roblox/basalt/diffuse.tga",
"Roblox/brick/diffuse.tga",
"Roblox/cobblestone_side/diffuse.tga",
"Roblox/cobblestone_top/diffuse.tga",
"Roblox/concrete_side/diffuse.tga",
"Roblox/concrete_top/diffuse.tga",
"Roblox/crackedlava/diffuse.tga",
"Roblox/glacier_bottom/diffuse.tga",
"Roblox/glacier_side/diffuse.tga",
"Roblox/glacier_top/diffuse.tga",
"Roblox/grass_bottom/diffuse.tga",
"Roblox/grass_side/diffuse.tga",
"Roblox/grass_top/diffuse.tga",
"Roblox/ground/diffuse.tga",
"Roblox/ice_side/diffuse.tga",
"Roblox/ice_top/diffuse.tga",
"Roblox/leafygrass_side/diffuse.tga",
"Roblox/leafygrass_top/diffuse.tga",
"Roblox/limestone_side/diffuse.tga",
"Roblox/limestone_top/diffuse.tga",
"Roblox/mud/diffuse.tga",
"Roblox/pavement_side/diffuse.tga",
"Roblox/pavement_top/diffuse.tga",
"Roblox/rock/diffuse.tga",
"Roblox/salt_side/diffuse.tga",
"Roblox/salt_top/diffuse.tga",
"Roblox/sand_side/diffuse.tga",
"Roblox/sand_top/diffuse.tga",
"Roblox/sandstone_bottom/diffuse.tga",
"Roblox/sandstone_side/diffuse.tga",
"Roblox/sandstone_top/diffuse.tga",
"Roblox/slate/diffuse.tga",
"Roblox/snow/diffuse.tga",
"Roblox/woodplanks/diffuse.tga",
};
int main(int argc, char *argv[])
{
const char * inputFileName = "testsuite/kodak/kodim14.png";
//const char * inputFileName = "testsuite/kodak/kodim18.png";
//const char * inputFileName = "testsuite/kodak/kodim15.png";
//const char * inputFileName = "testsuite/waterloo/frymire.png";
// test_bc1(inputFileName, 0);
analyze_bc1(inputFileName);
//const char ** set = roblox_set;
//int count = sizeof(roblox_set) / sizeof(char*);
const char ** set = image_set;
int count = sizeof(image_set) / sizeof(char*);
Stats stats[COMPRESSOR_COUNT];
for (int i = 0; i < COMPRESSOR_COUNT; i++) {
stats[i].compressorName = nullptr;
stats[i].mseArray.resize(count, 0.0f);
stats[i].timeArray.resize(count, 0.0f);
}
for (int i = 0; i < count; i++) {
printf("\nImage '%s'\n", set[i]);
test_bc1(set[i], i, stats);
for (int c = 0; c < COMPRESSOR_COUNT; c++) {
if (stats[c].compressorName) {
printf("%-16s %f\t%f\n", stats[c].compressorName, sqrtf(stats[c].mseArray[i]), stats[c].timeArray[i]);
}
}
}
// Print stats.
printf("\nAverage Results:\n");
for (int c = 0; c < COMPRESSOR_COUNT; c++) {
if (stats[c].compressorName) {
float sum = 0.0f;
for (float it : stats[c].mseArray) {
sum += it;
}
sum /= count;
float time = 0.0f;
for (float it : stats[c].timeArray) {
time += it;
}
printf("%-16s %f\t%f\n", stats[c].compressorName, sqrtf(sum), time);
}
}
return EXIT_SUCCESS;
}

Loading…
Cancel
Save