Update nvtt to use icbc library.

This commit is contained in:
Ignacio 2020-04-13 18:01:33 -07:00
parent daff42781d
commit a671567596
14 changed files with 4128 additions and 1931 deletions

View File

@ -13,16 +13,15 @@ set (CMAKE_CXX_STANDARD 11)
IF(WIN32) IF(WIN32)
# gnuwin32 paths: # gnuwin32 paths:
SET(GNUWIN32_PATH "${NV_SOURCE_DIR}/extern/gnuwin32") #SET(GNUWIN32_PATH "${NV_SOURCE_DIR}/extern/gnuwin32")
SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${GNUWIN32_PATH}/include") #SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${GNUWIN32_PATH}/include")
SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${GNUWIN32_PATH}/lib") #SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${GNUWIN32_PATH}/lib")
# Set GLUT path: # Set GLUT path:
SET(GLUT_ROOT_DIR "${NV_SOURCE_DIR}/extern/glut") #SET(GLUT_ROOT_DIR "${NV_SOURCE_DIR}/extern/glut")
# Set FreeImage path: # Set FreeImage path:
SET(FREEIMAGE_ROOT_DIR "${NV_SOURCE_DIR}/extern/FreeImage") #SET(FREEIMAGE_ROOT_DIR "${NV_SOURCE_DIR}/extern/FreeImage")
ENDIF(WIN32) ENDIF(WIN32)
INCLUDE(${NV_CMAKE_DIR}/OptimalOptions.cmake) INCLUDE(${NV_CMAKE_DIR}/OptimalOptions.cmake)
@ -36,11 +35,11 @@ IF(CMAKE_BUILD_TYPE MATCHES "debug")
ENDIF() ENDIF()
IF(NVTT_SHARED) #IF(NVTT_SHARED)
SET(NVCORE_SHARED TRUE) # SET(NVCORE_SHARED TRUE)
SET(NVMATH_SHARED TRUE) # SET(NVMATH_SHARED TRUE)
SET(NVIMAGE_SHARED TRUE) # SET(NVIMAGE_SHARED TRUE)
ENDIF(NVTT_SHARED) #ENDIF(NVTT_SHARED)
ADD_SUBDIRECTORY(extern) ADD_SUBDIRECTORY(extern)

View File

@ -1,4 +1,4 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com> // This code is in the public domain -- Ignacio Castano <castano@gmail.com>
#pragma once #pragma once
#ifndef NV_CORE_H #ifndef NV_CORE_H

View File

@ -1,4 +1,4 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com> // This code is in the public domain -- Ignacio Castano <castano@gmail.com>
#pragma once #pragma once
#include "nvmath.h" #include "nvmath.h"

View File

@ -206,15 +206,16 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d,
// BC1 // BC1
#include "CompressorDXT1.h" #include "icbc.h"
void FastCompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) void FastCompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{ {
compress_dxt1_fast(colors, weights, compressionOptions.colorWeight.xyz(), (BlockDXT1 *)output); icbc::compress_dxt1_fast((float*)colors, weights, compressionOptions.colorWeight.component, output);
} }
void CompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) void CompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{ {
compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, false, (BlockDXT1 *)output); bool hq = compressionOptions.quality > Quality_Normal;
icbc::compress_dxt1((float*)colors, weights, compressionOptions.colorWeight.component, /*three_color_mode*/true, hq, output);
} }

View File

@ -1,63 +1,63 @@
PROJECT(nvtt) PROJECT(nvtt)
ADD_SUBDIRECTORY(squish) ADD_SUBDIRECTORY(squish)
SET(NVTT_SRCS SET(NVTT_SRCS
nvtt.h nvtt.cpp nvtt.h nvtt.cpp
nvtt_wrapper.h nvtt_wrapper.cpp nvtt_wrapper.h nvtt_wrapper.cpp
ClusterFit.h ClusterFit.cpp ClusterFit.h ClusterFit.cpp
Compressor.h Compressor.h
BlockCompressor.h BlockCompressor.cpp BlockCompressor.h BlockCompressor.cpp
CompressorDX9.h CompressorDX9.cpp CompressorDX9.h CompressorDX9.cpp
CompressorDX10.h CompressorDX10.cpp CompressorDX10.h CompressorDX10.cpp
CompressorDX11.h CompressorDX11.cpp CompressorDX11.h CompressorDX11.cpp
CompressorDXT1.h CompressorDXT1.cpp icbc.h icbc.cpp
CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp
CompressorETC.h CompressorETC.cpp CompressorETC.h CompressorETC.cpp
CompressorRGB.h CompressorRGB.cpp CompressorRGB.h CompressorRGB.cpp
Context.h Context.cpp Context.h Context.cpp
QuickCompressDXT.h QuickCompressDXT.cpp QuickCompressDXT.h QuickCompressDXT.cpp
OptimalCompressDXT.h OptimalCompressDXT.cpp OptimalCompressDXT.h OptimalCompressDXT.cpp
SingleColorLookup.h SingleColorLookup.cpp SingleColorLookup.h SingleColorLookup.cpp
CompressionOptions.h CompressionOptions.cpp CompressionOptions.h CompressionOptions.cpp
InputOptions.h InputOptions.cpp InputOptions.h InputOptions.cpp
OutputOptions.h OutputOptions.cpp OutputOptions.h OutputOptions.cpp
TaskDispatcher.h #TaskDispatcher.cpp TaskDispatcher.h #TaskDispatcher.cpp
Surface.h Surface.cpp Surface.h Surface.cpp
CubeSurface.h CubeSurface.cpp CubeSurface.h CubeSurface.cpp
cuda/CudaUtils.h cuda/CudaUtils.cpp cuda/CudaUtils.h cuda/CudaUtils.cpp
cuda/CudaMath.h cuda/CudaMath.h
cuda/BitmapTable.h cuda/BitmapTable.h
cuda/CudaCompressorDXT.h cuda/CudaCompressorDXT.cpp) cuda/CudaCompressorDXT.h cuda/CudaCompressorDXT.cpp)
IF (CUDA_FOUND) IF (CUDA_FOUND)
ADD_DEFINITIONS(-DHAVE_CUDA) ADD_DEFINITIONS(-DHAVE_CUDA)
CUDA_COMPILE(CUDA_SRCS cuda/CompressKernel.cu) CUDA_COMPILE(CUDA_SRCS cuda/CompressKernel.cu)
SET(NVTT_SRCS ${NVTT_SRCS} ${CUDA_SRCS}) SET(NVTT_SRCS ${NVTT_SRCS} ${CUDA_SRCS})
SET(LIBS ${LIBS} ${CUDA_LIBRARIES}) SET(LIBS ${LIBS} ${CUDA_LIBRARIES})
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
ENDIF (CUDA_FOUND) ENDIF (CUDA_FOUND)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/rg_etc1_v104) INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/rg_etc1_v104)
ADD_DEFINITIONS(-DNVTT_EXPORTS) ADD_DEFINITIONS(-DNVTT_EXPORTS)
IF(NVTT_SHARED) IF(NVTT_SHARED)
ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS}) ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
ELSE(NVTT_SHARED) ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS}) ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED) ENDIF(NVTT_SHARED)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread nvsquish bc6h bc7 nvmath rg_etc1) TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread nvsquish bc6h bc7 nvmath rg_etc1)
INSTALL(TARGETS nvtt INSTALL(TARGETS nvtt
RUNTIME DESTINATION bin RUNTIME DESTINATION bin
LIBRARY DESTINATION lib LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib/static) ARCHIVE DESTINATION lib/static)
INSTALL(FILES nvtt.h nvtt_wrapper.h DESTINATION include/nvtt) INSTALL(FILES nvtt.h nvtt_wrapper.h DESTINATION include/nvtt)
ADD_SUBDIRECTORY(tools) ADD_SUBDIRECTORY(tools)
ADD_SUBDIRECTORY(tests) ADD_SUBDIRECTORY(tests)

File diff suppressed because it is too large Load Diff

View File

@ -1,29 +0,0 @@
namespace nv {
struct BlockDXT1;
class Vector3;
class Vector4;
void init_dxt1();
// All these functions return MSE.
float compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
//float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
float compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int search_limit, BlockDXT1 * output);
void compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output);
// Cluster fit end point selection.
float compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, bool hq, BlockDXT1 * output);
// Quick end point selection followed by least squares refinement.
float compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output);
// @@ Change these interfaces to take a pitch argument instead of assuming (4*4), just like CMP_Core.
void compress_dxt1_fast2(const unsigned char input_colors[16*4], BlockDXT1 * output);
void compress_dxt1_fast_geld(const unsigned char input_colors[16 * 4], BlockDXT1 * output);
float evaluate_dxt1_error(const unsigned char rgba_block[16 * 4], const BlockDXT1 * block, int decoder = 0);
}

View File

@ -1,5 +1,5 @@
#include "CompressorDXT5_RGBM.h" #include "CompressorDXT5_RGBM.h"
#include "CompressorDXT1.h" #include "icbc.h"
#include "OptimalCompressDXT.h" #include "OptimalCompressDXT.h"
#include "QuickCompressDXT.h" #include "QuickCompressDXT.h"
@ -58,8 +58,10 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w
float rgb_weights[16]; float rgb_weights[16];
convert_to_rgbm(input_colors, input_weights, min_m, input_colors_rgbm, rgb_weights); convert_to_rgbm(input_colors, input_weights, min_m, input_colors_rgbm, rgb_weights);
float color_weights[3] = { 1.0f,1.0f,1.0f };
// Compress RGB. // Compress RGB.
compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, /*hq=*/false, &output->color); icbc::compress_dxt1((float *)input_colors_rgbm, rgb_weights, color_weights, /*three_color_mode=*/false, /*hq=*/false, &output->color);
// Decompress RGB/M block. // Decompress RGB/M block.
nv::ColorBlock RGB; nv::ColorBlock RGB;

View File

@ -6,7 +6,7 @@
#include "nvmath/Color.inl" #include "nvmath/Color.inl"
#include "nvcore/Utils.h" // clamp #include "nvcore/Utils.h" // clamp
#define HAVE_RGETC NV_OS_OSX #define HAVE_RGETC 1
#define HAVE_ETCPACK 0 // Only enable in OSX for debugging. #define HAVE_ETCPACK 0 // Only enable in OSX for debugging.
#if HAVE_RGETC #if HAVE_RGETC
@ -190,7 +190,7 @@ static const float midpoints5[32] = {
// ETC2 Modes: // ETC2 Modes:
// - ETC1: // - ETC1:
// - two partitions (flip modes): 2*(4x2, 2x4) // - two partitions (flip modes): 2*(4x2, 2x4)
// - two base colors sotred as 444+444 or 555+333 // - two base colors stored as 444+444 or 555+333
// - two 3 bit intensity modifiers // - two 3 bit intensity modifiers
// - T Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices. // - T Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices.
// - H Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices. // - H Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices.

View File

@ -30,6 +30,7 @@
#include "CompressionOptions.h" #include "CompressionOptions.h"
#include "OutputOptions.h" #include "OutputOptions.h"
#include "Surface.h" #include "Surface.h"
#include "icbc.h"
#include "CompressorDX9.h" #include "CompressorDX9.h"
#include "CompressorDX10.h" #include "CompressorDX10.h"
@ -67,6 +68,8 @@ Compressor::Compressor() : m(*new Compressor::Private())
enableCudaAcceleration(m.cudaSupported); enableCudaAcceleration(m.cudaSupported);
m.dispatcher = &m.defaultDispatcher; m.dispatcher = &m.defaultDispatcher;
icbc::init();
} }
Compressor::~Compressor() Compressor::~Compressor()

2
src/nvtt/icbc.cpp Normal file
View File

@ -0,0 +1,2 @@
#define ICBC_IMPLEMENTATION
#include "icbc.h"

3922
src/nvtt/icbc.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -29,7 +29,7 @@ ADD_EXECUTABLE(nvhdrtest hdrtest.cpp)
TARGET_LINK_LIBRARIES(nvhdrtest nvcore nvimage nvtt bc6h nvmath) TARGET_LINK_LIBRARIES(nvhdrtest nvcore nvimage nvtt bc6h nvmath)
ADD_EXECUTABLE(bc1enc bc1enc.cpp) ADD_EXECUTABLE(bc1enc bc1enc.cpp)
TARGET_LINK_LIBRARIES(bc1enc nvcore nvimage nvmath nvtt squish CMP_Core) TARGET_LINK_LIBRARIES(bc1enc nvcore nvimage nvmath squish CMP_Core)
INSTALL(TARGETS nvtestsuite nvhdrtest DESTINATION bin) INSTALL(TARGETS nvtestsuite nvhdrtest DESTINATION bin)

View File

@ -1,6 +1,5 @@
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#include <assert.h>
#include <stdlib.h> #include <stdlib.h>
//#define STBI_ASSERT(x) //#define STBI_ASSERT(x)
@ -13,12 +12,13 @@
#define RGBCX_IMPLEMENTATION #define RGBCX_IMPLEMENTATION
#include "../extern/rg/rgbcx.h" #include "../extern/rg/rgbcx.h"
#define ICBC_IMPLEMENTATION
#include "nvtt/icbc.h"
#include "../extern/libsquish-1.15/squish.h" #include "../extern/libsquish-1.15/squish.h"
#include "../extern/CMP_Core/source/CMP_Core.h" #include "../extern/CMP_Core/source/CMP_Core.h"
#include "nvtt/CompressorDXT1.h"
#include "nvmath/Vector.h" #include "nvmath/Vector.h"
#include "nvmath/Color.h" #include "nvmath/Color.h"
@ -37,73 +37,23 @@ typedef unsigned int u32;
#define TEST_RGBCX 1 #define TEST_RGBCX 1
#define TEST_NVTT_FAST 1 #define TEST_NVTT_FAST 1
#define TEST_NVTT_TEST 1
#define TEST_NVTT 1 #define TEST_NVTT 1
#define TEST_NVTT_HQ 1 #define TEST_NVTT_HQ 0
#define TEST_SQUISH 1 #define TEST_SQUISH 0
#define TEST_SQUISH_HQ 1 #define TEST_SQUISH_HQ 0
#define TEST_AMD_CMP 1 #define TEST_AMD_CMP 0
static float mse_to_psnr(float mse) {
float rms = sqrtf(mse);
float psnr = rms ? (float)clamp(log10(255.0 / rms) * 20.0, 0.0, 300.0) : 1e+10f;
return psnr;
}
/*
void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma)
{
//assert((first_chan < 4U) && (first_chan + total_chans <= 4U));
const uint32_t width = std::min(a.get_width(), b.get_width());
const uint32_t height = std::min(a.get_height(), b.get_height());
double hist[256];
memset(hist, 0, sizeof(hist));
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
{
const color_rgba &ca = a(x, y), &cb = b(x, y);
for (uint32_t c = 0; c < 3; c++)
hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++;
}
}
m_max = 0;
double sum = 0.0f, sum2 = 0.0f;
for (uint32_t i = 0; i < 256; i++)
{
if (hist[i])
{
m_max = std::max<float>(m_max, (float)i);
double v = i * hist[i];
sum += v;
sum2 += i * v;
}
}
double total_values = (double)width * (double)height;
if (avg_comp_error)
total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);
m_mean = (float)clamp<double>(sum / total_values, 0.0f, 255.0);
m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, 255.0 * 255.0);
m_rms = (float)sqrt(m_mean_squared);
m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0, 0.0f, 300.0f) : 1e+10f;
}
*/
// Returns mse. // Returns mse.
float evaluate_dxt1_mse(uint8 * rgba, uint8 * block, int block_count, int decoder = 0) { float evaluate_dxt1_mse(uint8 * rgba, uint8 * block, int block_count, icbc::Decoder decoder = icbc::Decoder_D3D10) {
double total = 0.0f; double total = 0.0f;
for (int b = 0; b < block_count; b++) { for (int b = 0; b < block_count; b++) {
total += nv::evaluate_dxt1_error(rgba, (BlockDXT1 *)block, decoder); total += icbc::evaluate_dxt1_error(rgba, block, decoder);
rgba += 4 * 4 * 4; rgba += 4 * 4 * 4;
block += 8; block += 8;
} }
@ -250,7 +200,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "stb"; stats->compressorName = "stb";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -274,7 +224,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "stb-hq"; stats->compressorName = "stb-hq";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -300,7 +250,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "rgbcx"; stats->compressorName = "rgbcx";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -310,22 +260,22 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (TEST_NVTT_FAST) { if (TEST_NVTT_FAST) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
Vector3 color_weights(1); float color_weights[3] = { 1, 1, 1 };
timer.start(); timer.start();
for (int i = 0; i < repeat_count; i++) { for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) { for (int b = 0; b < block_count; b++) {
Vector4 input_colors[16]; float input_colors[16*4];
float input_weights[16]; float input_weights[16];
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f; input_colors[4*j+0] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f; input_colors[4*j+1] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f; input_colors[4*j+2] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f;
input_colors[j].w = 255.0f; input_colors[4*j+3] = 1.0f;
input_weights[j] = 1.0f; input_weights[j] = 1.0f;
} }
compress_dxt1_fast(input_colors, input_weights, color_weights, (BlockDXT1*)(block_data + b * 8)); icbc::compress_dxt1_fast(input_colors, input_weights, color_weights, (block_data + b * 8));
} }
} }
timer.stop(); timer.stop();
@ -335,7 +285,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "nvtt-fast"; stats->compressorName = "nvtt-fast";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -343,24 +293,59 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
} }
} }
if (TEST_NVTT) { if (TEST_NVTT_TEST) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
Vector3 color_weights(1); float color_weights[3] = { 1, 1, 1 };
timer.start(); timer.start();
for (int i = 0; i < repeat_count; i++) { for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) { for (int b = 0; b < block_count; b++) {
Vector4 input_colors[16]; float input_colors[16 * 4];
float input_weights[16]; float input_weights[16];
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f; input_colors[4 * j + 0] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f; input_colors[4 * j + 1] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f; input_colors[4 * j + 2] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f;
input_colors[j].w = 1.0f; input_colors[4 * j + 3] = 1.0f;
input_weights[j] = 1.0f; input_weights[j] = 1.0f;
} }
compress_dxt1(input_colors, input_weights, color_weights, false, false, (BlockDXT1*)(block_data + b * 8)); icbc::compress_dxt1_test(input_colors, input_weights, color_weights, (block_data + b * 8));
}
}
timer.stop();
float mse = evaluate_dxt1_mse(rgba_block_data, block_data, block_count);
if (stats) {
stats->compressorName = "nvtt-test";
stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++;
}
else {
output_dxt_dds(bw, bh, block_data, "nvtt_test.dds");
}
}
if (TEST_NVTT) {
memset(block_data, 0, block_count * 8);
float color_weights[3] = { 1, 1, 1 };
timer.start();
for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) {
float input_colors[16*4];
float input_weights[16];
for (int j = 0; j < 16; j++) {
input_colors[4*j+0] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f;
input_colors[4*j+1] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f;
input_colors[4*j+2] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f;
input_colors[4*j+3] = 1.0f;
input_weights[j] = 1.0f;
}
icbc::compress_dxt1(input_colors, input_weights, color_weights, false, false, (block_data + b * 8));
} }
} }
timer.stop(); timer.stop();
@ -370,7 +355,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "nvtt"; stats->compressorName = "nvtt";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -380,22 +365,22 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (TEST_NVTT_HQ) { if (TEST_NVTT_HQ) {
memset(block_data, 0, block_count * 8); memset(block_data, 0, block_count * 8);
Vector3 color_weights(1); float color_weights[3] = { 1, 1, 1 };
timer.start(); timer.start();
for (int i = 0; i < repeat_count; i++) { for (int i = 0; i < repeat_count; i++) {
for (int b = 0; b < block_count; b++) { for (int b = 0; b < block_count; b++) {
Vector4 input_colors[16]; float input_colors[16 * 4];
float input_weights[16]; float input_weights[16];
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f; input_colors[4 * j + 0] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f; input_colors[4 * j + 1] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f; input_colors[4 * j + 2] = rgba_block_data[b * 4 * 4 * 4 + j * 4 + 2] / 255.0f;
input_colors[j].w = 1.0f; input_colors[4 * j + 3] = 1.0f;
input_weights[j] = 1.0f; input_weights[j] = 1.0f;
} }
compress_dxt1(input_colors, input_weights, color_weights, true, true, (BlockDXT1*)(block_data + b * 8)); icbc::compress_dxt1(input_colors, input_weights, color_weights, true, true, (block_data + b * 8));
} }
} }
timer.stop(); timer.stop();
@ -405,7 +390,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "nvtt-hq"; stats->compressorName = "nvtt-hq";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -429,7 +414,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "squish"; stats->compressorName = "squish";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -453,7 +438,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "squish-hq"; stats->compressorName = "squish-hq";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -477,7 +462,7 @@ bool test_bc1(const char * inputFileName, int index, Stats * stats) {
if (stats) { if (stats) {
stats->compressorName = "cmp"; stats->compressorName = "cmp";
stats->mseArray[index] = mse; stats->mseArray[index] = mse;
stats->timeArray[index] = timer.elapsed(); stats->timeArray[index] = timer.elapsed() / repeat_count;
stats++; stats++;
} }
else { else {
@ -545,51 +530,51 @@ bool analyze_bc1(const char * inputFileName) {
int this_should_never_happen = 0; int this_should_never_happen = 0;
int this_should_never_happen_either = 0; int this_should_never_happen_either = 0;
Vector3 color_weights(1); float color_weights[3] = { 1, 1, 1 };
for (int b = 0; b < block_count; b++) { for (int b = 0; b < block_count; b++) {
uint8 * rgba_block = rgba_block_data + b * 4 * 4 * 4; uint8 * rgba_block = rgba_block_data + b * 4 * 4 * 4;
uint8 * dxt_block = block_data + b * 8; uint8 * dxt_block = block_data + b * 8;
Vector4 input_colors[16]; float input_colors[16*4];
float input_weights[16]; float input_weights[16];
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
input_colors[j].x = rgba_block[j * 4 + 0] / 255.0f; input_colors[4*j+0] = rgba_block[j * 4 + 0] / 255.0f;
input_colors[j].y = rgba_block[j * 4 + 1] / 255.0f; input_colors[4*j+1] = rgba_block[j * 4 + 1] / 255.0f;
input_colors[j].z = rgba_block[j * 4 + 2] / 255.0f; input_colors[4*j+2] = rgba_block[j * 4 + 2] / 255.0f;
input_colors[j].w = 255.0f; input_colors[4*j+3] = 255.0f;
input_weights[j] = 1.0f; input_weights[j] = 1.0f;
} }
// Compare all the different modes on the same block: // Compare all the different modes on the same block:
stb_compress_dxt_block(dxt_block, rgba_block, 0, STB_DXT_NORMAL); stb_compress_dxt_block(dxt_block, rgba_block, 0, STB_DXT_NORMAL);
float mse_stb = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_stb = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
stb_compress_dxt_block(dxt_block, rgba_block, 0, STB_DXT_HIGHQUAL); stb_compress_dxt_block(dxt_block, rgba_block, 0, STB_DXT_HIGHQUAL);
float mse_stb_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_stb_hq = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
compress_dxt1_fast(input_colors, input_weights, color_weights, (BlockDXT1*)dxt_block); icbc::compress_dxt1_fast(input_colors, input_weights, color_weights, dxt_block);
float mse_nvtt_fast = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt_fast = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
compress_dxt1_fast2(rgba_block, (BlockDXT1*)dxt_block); icbc::compress_dxt1_fast(rgba_block, dxt_block);
float mse_nvtt_fast2 = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt_fast2 = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
compress_dxt1_fast_geld(rgba_block, (BlockDXT1*)dxt_block); icbc::compress_dxt1_test(input_colors, input_weights, color_weights, dxt_block);
float mse_nvtt_geld = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt_test = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
compress_dxt1(input_colors, input_weights, color_weights, true, false, (BlockDXT1*)dxt_block); icbc::compress_dxt1(input_colors, input_weights, color_weights, true, false, dxt_block);
float mse_nvtt = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
compress_dxt1(input_colors, input_weights, color_weights, true, true, (BlockDXT1*)dxt_block); icbc::compress_dxt1(input_colors, input_weights, color_weights, true, true, dxt_block);
float mse_nvtt_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_nvtt_hq = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
squish::Compress(rgba_block, dxt_block, squish::kDxt1); squish::Compress(rgba_block, dxt_block, squish::kDxt1);
float mse_squish = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_squish = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
squish::Compress(rgba_block, dxt_block, squish::kDxt1 | squish::kColourIterativeClusterFit); squish::Compress(rgba_block, dxt_block, squish::kDxt1 | squish::kColourIterativeClusterFit);
float mse_squish_hq = nv::evaluate_dxt1_error(rgba_block, (BlockDXT1 *)dxt_block); float mse_squish_hq = icbc::evaluate_dxt1_error(rgba_block, dxt_block);
if (mse_stb < mse_nvtt_fast) { if (mse_stb < mse_nvtt_fast) {
stb_better_than_nvtt_fast++; stb_better_than_nvtt_fast++;
@ -603,6 +588,9 @@ bool analyze_bc1(const char * inputFileName) {
if (mse_nvtt_hq < mse_nvtt) { if (mse_nvtt_hq < mse_nvtt) {
nvtt_hq_wins++; nvtt_hq_wins++;
} }
if (mse_nvtt < mse_nvtt_test) {
int k = 1;
}
if (mse_squish < mse_nvtt_hq) { if (mse_squish < mse_nvtt_hq) {
squish_better_than_nvtt_hq++; squish_better_than_nvtt_hq++;
} }
@ -619,6 +607,12 @@ bool analyze_bc1(const char * inputFileName) {
static float mse_to_psnr(float mse) {
float rms = sqrtf(mse);
float psnr = rms ? (float)clamp(log10(255.0 / rms) * 20.0, 0.0, 300.0) : 1e+10f;
return psnr;
}
const char * image_set[] = { const char * image_set[] = {
"testsuite/kodak/kodim01.png", "testsuite/kodak/kodim01.png",
@ -696,25 +690,26 @@ const char * roblox_set[] = {
}; };
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
const char * inputFileName = "testsuite/artificial.png"; //const char * inputFileName = "testsuite/artificial.png";
//const char * inputFileName = "testsuite/kodak/kodim14.png"; const char * inputFileName = "testsuite/kodak/kodim14.png";
//const char * inputFileName = "testsuite/kodak/kodim18.png"; //const char * inputFileName = "testsuite/kodak/kodim18.png";
//const char * inputFileName = "testsuite/kodak/kodim15.png"; //const char * inputFileName = "testsuite/kodak/kodim15.png";
//const char * inputFileName = "testsuite/waterloo/frymire.png"; //const char * inputFileName = "testsuite/waterloo/frymire.png";
//const char * inputFileName = "Roblox/leafygrass_top/diffuse.tga"; //const char * inputFileName = "Roblox/leafygrass_top/diffuse.tga";
icbc::init();
rgbcx::encode_bc1_init();
test_bc1(inputFileName, 0, NULL); test_bc1(inputFileName, 0, NULL);
//analyze_bc1(inputFileName); //analyze_bc1(inputFileName);
const char ** set = roblox_set; //const char ** set = roblox_set;
int count = sizeof(roblox_set) / sizeof(char*); //int count = sizeof(roblox_set) / sizeof(char*);
//const char ** set = image_set; const char ** set = image_set;
//int count = sizeof(image_set) / sizeof(char*); int count = sizeof(image_set) / sizeof(char*);
const int MAX_COMPRESSOR_COUNT = 16; const int MAX_COMPRESSOR_COUNT = 16;
Stats stats[MAX_COMPRESSOR_COUNT]; Stats stats[MAX_COMPRESSOR_COUNT];