Update nvtt to use icbc library.

This commit is contained in:
Ignacio 2020-04-13 18:01:33 -07:00
parent daff42781d
commit a671567596
14 changed files with 4128 additions and 1931 deletions

View File

@ -13,16 +13,15 @@ set (CMAKE_CXX_STANDARD 11)
IF(WIN32) IF(WIN32)
# gnuwin32 paths: # gnuwin32 paths:
SET(GNUWIN32_PATH "${NV_SOURCE_DIR}/extern/gnuwin32") #SET(GNUWIN32_PATH "${NV_SOURCE_DIR}/extern/gnuwin32")
SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${GNUWIN32_PATH}/include") #SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${GNUWIN32_PATH}/include")
SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${GNUWIN32_PATH}/lib") #SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${GNUWIN32_PATH}/lib")
# Set GLUT path: # Set GLUT path:
SET(GLUT_ROOT_DIR "${NV_SOURCE_DIR}/extern/glut") #SET(GLUT_ROOT_DIR "${NV_SOURCE_DIR}/extern/glut")
# Set FreeImage path: # Set FreeImage path:
SET(FREEIMAGE_ROOT_DIR "${NV_SOURCE_DIR}/extern/FreeImage") #SET(FREEIMAGE_ROOT_DIR "${NV_SOURCE_DIR}/extern/FreeImage")
ENDIF(WIN32) ENDIF(WIN32)
INCLUDE(${NV_CMAKE_DIR}/OptimalOptions.cmake) INCLUDE(${NV_CMAKE_DIR}/OptimalOptions.cmake)
@ -36,11 +35,11 @@ IF(CMAKE_BUILD_TYPE MATCHES "debug")
ENDIF() ENDIF()
IF(NVTT_SHARED) #IF(NVTT_SHARED)
SET(NVCORE_SHARED TRUE) # SET(NVCORE_SHARED TRUE)
SET(NVMATH_SHARED TRUE) # SET(NVMATH_SHARED TRUE)
SET(NVIMAGE_SHARED TRUE) # SET(NVIMAGE_SHARED TRUE)
ENDIF(NVTT_SHARED) #ENDIF(NVTT_SHARED)
ADD_SUBDIRECTORY(extern) ADD_SUBDIRECTORY(extern)

View File

@ -1,4 +1,4 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com> // This code is in the public domain -- Ignacio Castano <castano@gmail.com>
#pragma once #pragma once
#ifndef NV_CORE_H #ifndef NV_CORE_H

View File

@ -1,4 +1,4 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com> // This code is in the public domain -- Ignacio Castano <castano@gmail.com>
#pragma once #pragma once
#include "nvmath.h" #include "nvmath.h"

View File

@ -206,15 +206,16 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d,
// BC1 // BC1
#include "CompressorDXT1.h" #include "icbc.h"
void FastCompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) void FastCompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{ {
compress_dxt1_fast(colors, weights, compressionOptions.colorWeight.xyz(), (BlockDXT1 *)output); icbc::compress_dxt1_fast((float*)colors, weights, compressionOptions.colorWeight.component, output);
} }
void CompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) void CompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{ {
compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, false, (BlockDXT1 *)output); bool hq = compressionOptions.quality > Quality_Normal;
icbc::compress_dxt1((float*)colors, weights, compressionOptions.colorWeight.component, /*three_color_mode*/true, hq, output);
} }

View File

@ -11,7 +11,7 @@ SET(NVTT_SRCS
CompressorDX9.h CompressorDX9.cpp CompressorDX9.h CompressorDX9.cpp
CompressorDX10.h CompressorDX10.cpp CompressorDX10.h CompressorDX10.cpp
CompressorDX11.h CompressorDX11.cpp CompressorDX11.h CompressorDX11.cpp
CompressorDXT1.h CompressorDXT1.cpp icbc.h icbc.cpp
CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp
CompressorETC.h CompressorETC.cpp CompressorETC.h CompressorETC.cpp
CompressorRGB.h CompressorRGB.cpp CompressorRGB.h CompressorRGB.cpp

File diff suppressed because it is too large Load Diff

View File

@ -1,29 +0,0 @@
namespace nv {
struct BlockDXT1;
class Vector3;
class Vector4;
void init_dxt1();
// All these functions return MSE.
float compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
//float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
float compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int search_limit, BlockDXT1 * output);
void compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output);
// Cluster fit end point selection.
float compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, bool hq, BlockDXT1 * output);
// Quick end point selection followed by least squares refinement.
float compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output);
// @@ Change these interfaces to take a pitch argument instead of assuming (4*4), just like CMP_Core.
void compress_dxt1_fast2(const unsigned char input_colors[16*4], BlockDXT1 * output);
void compress_dxt1_fast_geld(const unsigned char input_colors[16 * 4], BlockDXT1 * output);
float evaluate_dxt1_error(const unsigned char rgba_block[16 * 4], const BlockDXT1 * block, int decoder = 0);
}

View File

@ -1,5 +1,5 @@
#include "CompressorDXT5_RGBM.h" #include "CompressorDXT5_RGBM.h"
#include "CompressorDXT1.h" #include "icbc.h"
#include "OptimalCompressDXT.h" #include "OptimalCompressDXT.h"
#include "QuickCompressDXT.h" #include "QuickCompressDXT.h"
@ -58,8 +58,10 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w
float rgb_weights[16]; float rgb_weights[16];
convert_to_rgbm(input_colors, input_weights, min_m, input_colors_rgbm, rgb_weights); convert_to_rgbm(input_colors, input_weights, min_m, input_colors_rgbm, rgb_weights);
float color_weights[3] = { 1.0f,1.0f,1.0f };
// Compress RGB. // Compress RGB.
compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, /*hq=*/false, &output->color); icbc::compress_dxt1((float *)input_colors_rgbm, rgb_weights, color_weights, /*three_color_mode=*/false, /*hq=*/false, &output->color);
// Decompress RGB/M block. // Decompress RGB/M block.
nv::ColorBlock RGB; nv::ColorBlock RGB;

View File

@ -6,7 +6,7 @@
#include "nvmath/Color.inl" #include "nvmath/Color.inl"
#include "nvcore/Utils.h" // clamp #include "nvcore/Utils.h" // clamp
#define HAVE_RGETC NV_OS_OSX #define HAVE_RGETC 1
#define HAVE_ETCPACK 0 // Only enable in OSX for debugging. #define HAVE_ETCPACK 0 // Only enable in OSX for debugging.
#if HAVE_RGETC #if HAVE_RGETC
@ -190,7 +190,7 @@ static const float midpoints5[32] = {
// ETC2 Modes: // ETC2 Modes:
// - ETC1: // - ETC1:
// - two partitions (flip modes): 2*(4x2, 2x4) // - two partitions (flip modes): 2*(4x2, 2x4)
// - two base colors sotred as 444+444 or 555+333 // - two base colors stored as 444+444 or 555+333
// - two 3 bit intensity modifiers // - two 3 bit intensity modifiers
// - T Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices. // - T Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices.
// - H Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices. // - H Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices.

View File

@ -30,6 +30,7 @@
#include "CompressionOptions.h" #include "CompressionOptions.h"
#include "OutputOptions.h" #include "OutputOptions.h"
#include "Surface.h" #include "Surface.h"
#include "icbc.h"
#include "CompressorDX9.h" #include "CompressorDX9.h"
#include "CompressorDX10.h" #include "CompressorDX10.h"
@ -67,6 +68,8 @@ Compressor::Compressor() : m(*new Compressor::Private())
enableCudaAcceleration(m.cudaSupported); enableCudaAcceleration(m.cudaSupported);
m.dispatcher = &m.defaultDispatcher; m.dispatcher = &m.defaultDispatcher;
icbc::init();
} }
Compressor::~Compressor() Compressor::~Compressor()

2
src/nvtt/icbc.cpp Normal file
View File

@ -0,0 +1,2 @@
#define ICBC_IMPLEMENTATION
#include "icbc.h"

3922
src/nvtt/icbc.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -29,7 +29,7 @@ ADD_EXECUTABLE(nvhdrtest hdrtest.cpp)
TARGET_LINK_LIBRARIES(nvhdrtest nvcore nvimage nvtt bc6h nvmath) TARGET_LINK_LIBRARIES(nvhdrtest nvcore nvimage nvtt bc6h nvmath)
ADD_EXECUTABLE(bc1enc bc1enc.cpp) ADD_EXECUTABLE(bc1enc bc1enc.cpp)
TARGET_LINK_LIBRARIES(bc1enc nvcore nvimage nvmath nvtt squish CMP_Core) TARGET_LINK_LIBRARIES(bc1enc nvcore nvimage nvmath squish CMP_Core)
INSTALL(TARGETS nvtestsuite nvhdrtest DESTINATION bin) INSTALL(TARGETS nvtestsuite nvhdrtest DESTINATION bin)

View File

@ -1,6 +1,5 @@
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#include <assert.h>
#include <stdlib.h> #include <stdlib.h>
//#define STBI_ASSERT(x) //#define STBI_ASSERT(x)
@ -13,12 +12,13 @@
#define RGBCX_IMPLEMENTATION #define RGBCX_IMPLEMENTATION
#include "../extern/rg/rgbcx.h" #include "../extern/rg/rgbcx.h"
#define ICBC_IMPLEMENTATION
#include "nvtt/icbc.h"
#include "../extern/libsquish-1.15/squish.h" #include "../extern/libsquish-1.15/squish.h"
#include "../extern/CMP_Core/source/CMP_Core.h" #include "../extern/CMP_Core/source/CMP_Core.h"
#include "nvtt/CompressorDXT1.h"
#include "nvmath/Vector.h" #include "nvmath/Vector.h"
#include "nvmath/Color.h" #include "nvmath/Color.h"
@ -37,73 +37,23 @@ typedef unsigned int u32;
#define TEST_RGBCX 1 #define TEST_RGBCX 1
#define TEST_NVTT_FAST 1 #define TEST_NVTT_FAST 1
#define TEST_NVTT_TEST 1
#define TEST_NVTT 1 #define TEST_NVTT 1
#define TEST_NVTT_HQ 1 #define TEST_NVTT_HQ 0
#define TEST_SQUISH 1 #define TEST_SQUISH 0
#define TEST_SQUISH_HQ 1 #define TEST_SQUISH_HQ 0
#define TEST_AMD_CMP 1 #define TEST_AMD_CMP 0
static float mse_to_psnr(float mse) {
float rms = sqrtf(mse);
float psnr = rms ? (float)clamp(log10(255.0 / rms) * 20.0, 0.0, 300.0) : 1e+10f;
return psnr;
}
/*
void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma)
{
//assert((first_chan < 4U) && (first_chan + total_chans <= 4U));
const uint32_t width = std::min(a.get_width(), b.get_width());
const uint32_t height = std::min(a.get_height(), b.get_height());
double hist[256];
memset(hist, 0, sizeof(hist));
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
{
const color_rgba &ca = a(x, y), &cb = b(x, y);
for (uint32_t c = 0; c < 3; c++)
hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++;
}
}
m_max = 0;
double sum = 0.0f, sum2 = 0.0f;
for (uint32_t i = 0; i < 256; i++)
{
if (hist[i])
{
m_max = std::max<float>(m_max, (float)i);
double v = i * hist[i];
sum += v;
sum2 += i * v;
}
}
double total_values = (double)width * (double)height;
if (avg_comp_error)
total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);
m_mean = (float)clamp<double>(sum / total_values, 0.0f, 255.0);
m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, 255.0 * 255.0);
m_rms = (float)sqrt(m_mean_squared);
m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0, 0.0f, 300.0f) : 1e+10f;
}
*/
// Returns mse. // Returns mse.
float evaluate_dxt1_mse(uint8 * rgba, uint8 * block, int block_count, int decoder = 0) { float evaluate_dxt1_mse(uint8 * rgba, uint8 * block, int block_count, icbc::Decoder decoder = icbc::Decoder_D3D10) {
double total = 0.0f; double total = 0.0f;
for (int b = 0; b < block_count; b++) { for (int b = 0; b < block_count; b++) {
total += nv::evaluate_dxt1_error(rgba, (BlockDXT1 *)block, decoder); total += icbc::evaluate_dxt1_error(rgba, block, decoder);
rgba += 4 * 4 * 4; rgba += 4 * 4 * 4;
block += 8; block += 8;
} }
@ -250,7 +200,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "stb"; stats->compressorName = "stb";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -274,7 +224,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "stb-hq"; stats->compressorName = "stb-hq";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -300,7 +250,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "rgbcx"; stats->compressorName = "rgbcx";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -310,22 +260,22 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (TEST_NVTT_FAST) { if (TEST_NVTT_FAST) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
Vector3 color_weights(1); float color_weights[3] = { 1, 1, 1 };
timer.start(); timer.start();
for (int i = 0; i < repeat_count; i++) { for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) { for (int b = 0; b < block_count; b++) {
Vector4 input_colors[16]; float input_colors[16*4];
float input_weights[16]; float input_weights[16];
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f; input_colors[4*j+0] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f; input_colors[4*j+1] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f; input_colors[4*j+2] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f;
input_colors[j].w = 255.0f; input_colors[4*j+3] = 1.0f;
input_weights[j] = 1.0f; input_weights[j] = 1.0f;
} }
compress_dxt1_fast(input_colors, input_weights, color_weights, (BlockDXT1*)(block_data + b * 8)); icbc::compress_dxt1_fast(input_colors, input_weights, color_weights, (block_data + b * 8));
} }
} }
timer.stop(); timer.stop();
@ -335,7 +285,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "nvtt-fast"; stats->compressorName = "nvtt-fast";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -343,24 +293,59 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
} }
} }
if (TEST_NVTT) { if (TEST_NVTT_TEST) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
Vector3 color_weights(1); float color_weights[3] = { 1, 1, 1 };
timer.start(); timer.start();
for (int i = 0; i < repeat_count; i++) { for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) { for (int b = 0; b < block_count; b++) {
Vector4 input_colors[16]; float input_colors[16 * 4];
float input_weights[16]; float input_weights[16];
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f; input_colors[4 * j + 0] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f; input_colors[4 * j + 1] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f; input_colors[4 * j + 2] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f;
input_colors[j].w = 1.0f; input_colors[4 * j + 3] = 1.0f;
input_weights[j] = 1.0f; input_weights[j] = 1.0f;
} }
compress_dxt1(input_colors, input_weights, color_weights, false, false, (BlockDXT1*)(block_data + b * 8)); icbc::compress_dxt1_test(input_colors, input_weights, color_weights, (block_data + b * 8));
}
}
timer.stop();
float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count);
if (stats) {
stats->compressorName = "nvtt-test";
stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++;
}
else {
output_dxt_dds(bw, bh, block_data, "nvtt_test.dds");
}
}
if (TEST_NVTT) {
memset(block_data, 0, block_count * 8);
float color_weights[3] = { 1, 1, 1 };
timer.start();
for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) {
float input_colors[16*4];
float input_weights[16];
for (int j = 0; j < 16; j++) {
input_colors[4*j+0] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f;
input_colors[4*j+1] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f;
input_colors[4*j+2] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f;
input_colors[4*j+3] = 1.0f;
input_weights[j] = 1.0f;
}
icbc::compress_dxt1(input_colors, input_weights, color_weights, false, false, (block_data + b * 8));
} }
} }
timer.stop(); timer.stop();
@ -370,7 +355,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "nvtt"; stats->compressorName = "nvtt";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -380,22 +365,22 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (TEST_NVTT_HQ) { if (TEST_NVTT_HQ) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
Vector3 color_weights(1); float color_weights[3] = { 1, 1, 1 };
timer.start(); timer.start();
for (int i = 0; i < repeat_count; i++) { for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) { for (int b = 0; b < block_count; b++) {
Vector4 input_colors[16]; float input_colors[16 * 4];
float input_weights[16]; float input_weights[16];
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f; input_colors[4 * j + 0] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f; input_colors[4 * j + 1] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f; input_colors[4 * j + 2] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f;
input_colors[j].w = 1.0f; input_colors[4 * j + 3] = 1.0f;
input_weights[j] = 1.0f; input_weights[j] = 1.0f;
} }
compress_dxt1(input_colors, input_weights, color_weights, true, true, (BlockDXT1*)(block_data + b * 8)); icbc::compress_dxt1(input_colors, input_weights, color_weights, true, true, (block_data + b * 8));
} }
} }
timer.stop(); timer.stop();
@ -405,7 +390,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "nvtt-hq"; stats->compressorName = "nvtt-hq";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -429,7 +414,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "squish"; stats->compressorName = "squish";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -453,7 +438,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "squish-hq"; stats->compressorName = "squish-hq";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -477,7 +462,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "cmp"; stats->compressorName = "cmp";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -545,51 +530,51 @@ bool analyze_bc1(const char * inputFileName) {
int this_should_never_happen = 0; int this_should_never_happen = 0;
int this_should_never_happen_either = 0; int this_should_never_happen_either = 0;
Vector3 color_weights(1); float color_weights[3] = { 1, 1, 1 };
for (int b = 0; b < block_count; b++) { for (int b = 0; b < block_count; b++) {
uint8 * rgba_block = rgba_block_data + b * 4 * 4 * 4; uint8 * rgba_block = rgba_block_data + b * 4 * 4 * 4;
uint8 * dxt_block = block_data + b * 8; uint8 * dxt_block = block_data + b * 8;
Vector4 input_colors[16]; float input_colors[16*4];
float input_weights[16]; float input_weights[16];
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block[j * 4 + 0] / 255.0f; input_colors[4*j+0] = rgba_block[j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block[j * 4 + 1] / 255.0f; input_colors[4*j+1] = rgba_block[j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block[j * 4 + 2] / 255.0f; input_colors[4*j+2] = rgba_block[j * 4 + 2] / 255.0f;
input_colors[j].w = 255.0f; input_colors[4*j+3] = 255.0f;
input_weights[j] = 1.0f; input_weights[j] = 1.0f;
} }
// Compare all the different modes on the same block: // Compare all the different modes on the same block:
stb_compress_dxt_block(dxt_block, rgba_block, 0, STB_DXT_NORMAL); stb_compress_dxt_block(dxt_block, rgba_block, 0, STB_DXT_NORMAL);
float mse_stb = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_stb = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
stb_compress_dxt_block(dxt_block, rgba_block, 0, STB_DXT_HIGHQUAL); stb_compress_dxt_block(dxt_block, rgba_block, 0, STB_DXT_HIGHQUAL);
float mse_stb_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_stb_hq = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
compress_dxt1_fast(input_colors, input_weights, color_weights, (BlockDXT1*)dxt_block); icbc::compress_dxt1_fast(input_colors, input_weights, color_weights, dxt_block);
float mse_nvtt_fast = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt_fast = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
compress_dxt1_fast2(rgba_block, (BlockDXT1*)dxt_block); icbc::compress_dxt1_fast(rgba_block, dxt_block);
float mse_nvtt_fast2 = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt_fast2 = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
compress_dxt1_fast_geld(rgba_block, (BlockDXT1*)dxt_block); icbc::compress_dxt1_test(input_colors, input_weights, color_weights, dxt_block);
float mse_nvtt_geld = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt_test = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
compress_dxt1(input_colors, input_weights, color_weights, true, false, (BlockDXT1*)dxt_block); icbc::compress_dxt1(input_colors, input_weights, color_weights, true, false, dxt_block);
float mse_nvtt = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
compress_dxt1(input_colors, input_weights, color_weights, true, true, (BlockDXT1*)dxt_block); icbc::compress_dxt1(input_colors, input_weights, color_weights, true, true, dxt_block);
float mse_nvtt_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt_hq = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
squish::Compress(rgba_block, dxt_block, squish::kDxt1); squish::Compress(rgba_block, dxt_block, squish::kDxt1);
float mse_squish = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_squish = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
squish::Compress(rgba_block, dxt_block, squish::kDxt1 | squish::kColourIterativeClusterFit); squish::Compress(rgba_block, dxt_block, squish::kDxt1 | squish::kColourIterativeClusterFit);
float mse_squish_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_squish_hq = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
if (mse_stb < mse_nvtt_fast) { if (mse_stb < mse_nvtt_fast) {
stb_better_than_nvtt_fast++; stb_better_than_nvtt_fast++;
@ -603,6 +588,9 @@ bool analyze_bc1(const char * inputFileName) {
if (mse_nvtt_hq < mse_nvtt) { if (mse_nvtt_hq < mse_nvtt) {
nvtt_hq_wins++; nvtt_hq_wins++;
} }
if (mse_nvtt < mse_nvtt_test) {
int k = 1;
}
if (mse_squish < mse_nvtt_hq) { if (mse_squish < mse_nvtt_hq) {
squish_better_than_nvtt_hq++; squish_better_than_nvtt_hq++;
} }
@ -619,6 +607,12 @@ bool analyze_bc1(const char * inputFileName) {
static float mse_to_psnr(float mse) {
float rms = sqrtf(mse);
float psnr = rms ? (float)clamp(log10(255.0 / rms) * 20.0, 0.0, 300.0) : 1e+10f;
return psnr;
}
const char * image_set[] = { const char * image_set[] = {
"testsuite/kodak/kodim01.png", "testsuite/kodak/kodim01.png",
@ -696,25 +690,26 @@ const char * roblox_set[] = {
}; };
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
const char * inputFileName = "testsuite/artificial.png"; //const char * inputFileName = "testsuite/artificial.png";
//const char * inputFileName = "testsuite/kodak/kodim14.png"; const char * inputFileName = "testsuite/kodak/kodim14.png";
//const char * inputFileName = "testsuite/kodak/kodim18.png"; //const char * inputFileName = "testsuite/kodak/kodim18.png";
//const char * inputFileName = "testsuite/kodak/kodim15.png"; //const char * inputFileName = "testsuite/kodak/kodim15.png";
//const char * inputFileName = "testsuite/waterloo/frymire.png"; //const char * inputFileName = "testsuite/waterloo/frymire.png";
//const char * inputFileName = "Roblox/leafygrass_top/diffuse.tga"; //const char * inputFileName = "Roblox/leafygrass_top/diffuse.tga";
icbc::init();
rgbcx::encode_bc1_init();
test_bc1(inputFileName, 0, NULL); test_bc1(inputFileName, 0, NULL);
//analyze_bc1(inputFileName); //analyze_bc1(inputFileName);
const char ** set = roblox_set; //const char ** set = roblox_set;
int count = sizeof(roblox_set) / sizeof(char*); //int count = sizeof(roblox_set) / sizeof(char*);
//const char ** set = image_set; const char ** set = image_set;
//int count = sizeof(image_set) / sizeof(char*); int count = sizeof(image_set) / sizeof(char*);
const int MAX_COMPRESSOR_COUNT = 16; const int MAX_COMPRESSOR_COUNT = 16;
Stats stats[MAX_COMPRESSOR_COUNT]; Stats stats[MAX_COMPRESSOR_COUNT];