Compare commits
10 Commits
Author | SHA1 | Date | |
---|---|---|---|
fa4a7b9af7 | |||
f111d23637 | |||
bce983f39e | |||
ff93ad41cb | |||
56c7771100 | |||
ccced843e3 | |||
dafe2b8841 | |||
e3e7fcb226 | |||
970395fba8 | |||
8a24a93e2f |
@ -1,3 +1,9 @@
|
||||
NVIDIA Texture Tools version 2.0.1
|
||||
* Fix memory leaks.
|
||||
* Pre-allocate device memory for CUDA compressor.
|
||||
* Add single color compressor. Thanks to Amir Ebrahimi.
|
||||
* Better CUDA error checking.
|
||||
|
||||
NVIDIA Texture Tools version 2.0.0
|
||||
* Fixed PSNR formula in nvimgdiff.
|
||||
* Added support for arbitrary RGB formats.
|
||||
|
@ -2,7 +2,7 @@
|
||||
--------------------------------------------------------------------------------
|
||||
NVIDIA Texture Tools
|
||||
README.txt
|
||||
Version 2.0.0
|
||||
Version 2.0
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
@ -327,6 +327,10 @@
|
||||
RelativePath="..\..\..\src\nvcore\nvcore.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvcore\Ptr.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvcore\StrLib.h"
|
||||
>
|
||||
|
@ -53,8 +53,8 @@ END
|
||||
//
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 2,0,0,0
|
||||
PRODUCTVERSION 2,0,0,0
|
||||
FILEVERSION 2,0,1,0
|
||||
PRODUCTVERSION 2,0,1,0
|
||||
FILEFLAGSMASK 0x17L
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
@ -71,12 +71,12 @@ BEGIN
|
||||
BEGIN
|
||||
VALUE "CompanyName", "NVIDIA Corporation"
|
||||
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
|
||||
VALUE "FileVersion", "2, 0, 0, 0"
|
||||
VALUE "FileVersion", "2, 0, 1, 0"
|
||||
VALUE "InternalName", "nvtt"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2007"
|
||||
VALUE "OriginalFilename", "nvtt.dll"
|
||||
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
|
||||
VALUE "ProductVersion", "2, 0, 0, 0"
|
||||
VALUE "ProductVersion", "2, 0, 1, 0"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
@ -96,6 +96,8 @@
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
Description="Copying header files..."
|
||||
CommandLine="xcopy /y /f /i $(SolutionDir)\..\..\src\nvtt\nvtt*.h $(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
@ -258,6 +260,8 @@
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
Description="Copying header files..."
|
||||
CommandLine="xcopy /y /f /i $(SolutionDir)\..\..\src\nvtt\nvtt*.h $(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
@ -420,6 +424,8 @@
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
Description="Copying header files..."
|
||||
CommandLine="xcopy /y /f /i $(SolutionDir)\..\..\src\nvtt\nvtt*.h $(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
@ -578,6 +584,8 @@
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
Description="Copying header files..."
|
||||
CommandLine="xcopy /y /f /i $(SolutionDir)\..\..\src\nvtt\nvtt*.h $(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
|
@ -110,6 +110,19 @@ void ColorBlock::splatY()
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the block has a single color.
|
||||
bool ColorBlock::isSingleColor() const
|
||||
{
|
||||
for(int i = 1; i < 16; i++)
|
||||
{
|
||||
if (m_color[0] != m_color[i])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Count number of unique colors in this color block.
|
||||
uint ColorBlock::countUniqueColors() const
|
||||
|
@ -24,6 +24,7 @@ namespace nv
|
||||
void splatX();
|
||||
void splatY();
|
||||
|
||||
bool isSingleColor() const;
|
||||
uint countUniqueColors() const;
|
||||
Color32 averageColor() const;
|
||||
bool hasAlpha() const;
|
||||
|
@ -69,7 +69,14 @@ void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & ou
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
rgba.init(image, x, y);
|
||||
|
||||
QuickCompress::compressDXT1(rgba, &block);
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
QuickCompress::compressDXT1(rgba.color(0), &block);
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCompress::compressDXT1(rgba, &block);
|
||||
}
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
@ -214,10 +221,16 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
|
||||
|
||||
rgba.init(image, x, y);
|
||||
|
||||
// Compress color.
|
||||
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
|
||||
fit.SetColourSet(&colours, squish::kDxt1);
|
||||
fit.Compress(&block);
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
QuickCompress::compressDXT1(rgba.color(0), &block);
|
||||
}
|
||||
else
|
||||
{
|
||||
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
|
||||
fit.SetColourSet(&colours, squish::kDxt1);
|
||||
fit.Compress(&block);
|
||||
}
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
|
@ -205,16 +205,25 @@ namespace nvtt
|
||||
|
||||
Compressor::Compressor() : m(*new Compressor::Private())
|
||||
{
|
||||
// CUDA initialization.
|
||||
m.cudaSupported = cuda::isHardwarePresent();
|
||||
m.cudaEnabled = m.cudaSupported;
|
||||
|
||||
// @@ Do CUDA initialization here.
|
||||
if (m.cudaEnabled)
|
||||
{
|
||||
m.cuda = new CudaCompressor();
|
||||
|
||||
if (!m.cuda->isValid())
|
||||
{
|
||||
m.cudaEnabled = false;
|
||||
m.cuda = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Compressor::~Compressor()
|
||||
{
|
||||
// @@ Free CUDA resources here.
|
||||
delete &m;
|
||||
}
|
||||
|
||||
|
||||
@ -225,6 +234,17 @@ void Compressor::enableCudaAcceleration(bool enable)
|
||||
{
|
||||
m.cudaEnabled = enable;
|
||||
}
|
||||
|
||||
if (m.cudaEnabled && m.cuda == NULL)
|
||||
{
|
||||
m.cuda = new CudaCompressor();
|
||||
|
||||
if (!m.cuda->isValid())
|
||||
{
|
||||
m.cudaEnabled = false;
|
||||
m.cuda = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if CUDA acceleration is enabled.
|
||||
@ -670,7 +690,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
|
||||
if (cudaEnabled)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cudaCompressDXT1(image, outputOptions, compressionOptions);
|
||||
cuda->compressDXT1(image, outputOptions, compressionOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -708,7 +728,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
|
||||
if (cudaEnabled)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cudaCompressDXT3(image, outputOptions, compressionOptions);
|
||||
cuda->compressDXT3(image, outputOptions, compressionOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -727,7 +747,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
|
||||
if (cudaEnabled)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cudaCompressDXT5(image, outputOptions, compressionOptions);
|
||||
cuda->compressDXT5(image, outputOptions, compressionOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -24,6 +24,10 @@
|
||||
#ifndef NV_TT_COMPRESSOR_H
|
||||
#define NV_TT_COMPRESSOR_H
|
||||
|
||||
#include <nvcore/Ptr.h>
|
||||
|
||||
#include <nvtt/cuda/CudaCompressDXT.h>
|
||||
|
||||
#include "nvtt.h"
|
||||
|
||||
namespace nv
|
||||
@ -63,6 +67,9 @@ namespace nvtt
|
||||
|
||||
bool cudaSupported;
|
||||
bool cudaEnabled;
|
||||
|
||||
nv::AutoPtr<nv::CudaCompressor> cuda;
|
||||
|
||||
};
|
||||
|
||||
} // nvtt namespace
|
||||
|
@ -353,12 +353,18 @@ static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
|
||||
void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
|
||||
{
|
||||
dxtBlock->col0.r = OMatch5[c.r][0];
|
||||
dxtBlock->col0.g = OMatch5[c.g][0];
|
||||
dxtBlock->col0.g = OMatch6[c.g][0];
|
||||
dxtBlock->col0.b = OMatch5[c.b][0];
|
||||
dxtBlock->col1.r = OMatch5[c.r][1];
|
||||
dxtBlock->col1.g = OMatch5[c.g][1];
|
||||
dxtBlock->col1.g = OMatch6[c.g][1];
|
||||
dxtBlock->col1.b = OMatch5[c.b][1];
|
||||
dxtBlock->indices = 0xaaaaaaaa;
|
||||
|
||||
if (dxtBlock->col0.u < dxtBlock->col1.u)
|
||||
{
|
||||
swap(dxtBlock->col0.u, dxtBlock->col1.u);
|
||||
dxtBlock->indices ^= 0x55555555;
|
||||
}
|
||||
}
|
||||
|
||||
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
|
||||
|
@ -48,7 +48,12 @@ void initTables()
|
||||
};
|
||||
*/
|
||||
|
||||
const static uint8 OMatch5[256][2] =
|
||||
#if __CUDACC__
|
||||
__constant__ unsigned short
|
||||
#else
|
||||
const static uint8
|
||||
#endif
|
||||
OMatch5[256][2] =
|
||||
{
|
||||
{0x00, 0x00},
|
||||
{0x00, 0x00},
|
||||
@ -308,7 +313,12 @@ const static uint8 OMatch5[256][2] =
|
||||
{0x1F, 0x1F},
|
||||
};
|
||||
|
||||
const static uint8 OMatch6[256][2] =
|
||||
#if __CUDACC__
|
||||
__constant__ unsigned short
|
||||
#else
|
||||
const static uint8
|
||||
#endif
|
||||
OMatch6[256][2] =
|
||||
{
|
||||
{0x00, 0x00},
|
||||
{0x00, 0x01},
|
||||
|
@ -122,7 +122,7 @@ static void doPrecomputation()
|
||||
*/
|
||||
|
||||
|
||||
const static uint bitmaps[992] =
|
||||
const static uint s_bitmapTable[992] =
|
||||
{
|
||||
0x80000000,
|
||||
0x40000000,
|
||||
|
@ -28,6 +28,8 @@
|
||||
|
||||
#include "CudaMath.h"
|
||||
|
||||
#include "../SingleColorLookup.h"
|
||||
|
||||
#define NUM_THREADS 64 // Number of threads per block.
|
||||
|
||||
#if __DEVICE_EMULATION__
|
||||
@ -60,6 +62,7 @@ __device__ void sortColors(const float * values, int * cmp)
|
||||
{
|
||||
int tid = threadIdx.x;
|
||||
|
||||
#if 1
|
||||
cmp[tid] = (values[0] < values[tid]);
|
||||
cmp[tid] += (values[1] < values[tid]);
|
||||
cmp[tid] += (values[2] < values[tid]);
|
||||
@ -93,13 +96,30 @@ __device__ void sortColors(const float * values, int * cmp)
|
||||
if (tid > 12 && cmp[tid] == cmp[12]) ++cmp[tid];
|
||||
if (tid > 13 && cmp[tid] == cmp[13]) ++cmp[tid];
|
||||
if (tid > 14 && cmp[tid] == cmp[14]) ++cmp[tid];
|
||||
#else
|
||||
|
||||
cmp[tid] = 0;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
cmp[tid] += (values[i] < values[tid]);
|
||||
}
|
||||
|
||||
// Resolve elements with the same index.
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 15; i++)
|
||||
{
|
||||
if (tid > 0 && cmp[tid] == cmp[i]) ++cmp[tid];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Load color block to shared mem
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16])
|
||||
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
const int idx = threadIdx.x;
|
||||
@ -124,6 +144,8 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
||||
colorSums(colors, sums);
|
||||
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
|
||||
|
||||
*sameColor = (axis == make_float3(0, 0, 0));
|
||||
|
||||
dps[idx] = dot(colors[idx], axis);
|
||||
|
||||
#if __DEVICE_EMULATION__
|
||||
@ -187,7 +209,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Round color to RGB565 and expand
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline __device__ float3 roundAndExpand(float3 v, ushort * w)
|
||||
inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
|
||||
{
|
||||
v.x = rintf(__saturatef(v.x) * 31.0f);
|
||||
v.y = rintf(__saturatef(v.y) * 63.0f);
|
||||
@ -234,8 +256,8 @@ __device__ float evalPermutation4(const float3 * colors, uint permutation, ushor
|
||||
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||
|
||||
// Round a, b to the closest 5-6-5 color and expand...
|
||||
a = roundAndExpand(a, start);
|
||||
b = roundAndExpand(b, end);
|
||||
a = roundAndExpand565(a, start);
|
||||
b = roundAndExpand565(b, end);
|
||||
|
||||
// compute the error
|
||||
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||
@ -274,8 +296,8 @@ __device__ float evalPermutation3(const float3 * colors, uint permutation, ushor
|
||||
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||
|
||||
// Round a, b to the closest 5-6-5 color and expand...
|
||||
a = roundAndExpand(a, start);
|
||||
b = roundAndExpand(b, end);
|
||||
a = roundAndExpand565(a, start);
|
||||
b = roundAndExpand565(b, end);
|
||||
|
||||
// compute the error
|
||||
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||
@ -315,8 +337,8 @@ __device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint
|
||||
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||
|
||||
// Round a, b to the closest 5-6-5 color and expand...
|
||||
a = roundAndExpand(a, start);
|
||||
b = roundAndExpand(b, end);
|
||||
a = roundAndExpand565(a, start);
|
||||
b = roundAndExpand565(b, end);
|
||||
|
||||
// compute the error
|
||||
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||
@ -351,8 +373,8 @@ __device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint
|
||||
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||
|
||||
// Round a, b to the closest 5-6-5 color and expand...
|
||||
a = roundAndExpand(a, start);
|
||||
b = roundAndExpand(b, end);
|
||||
a = roundAndExpand565(a, start);
|
||||
b = roundAndExpand565(b, end);
|
||||
|
||||
// compute the error
|
||||
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||
@ -391,8 +413,8 @@ __device__ float evalPermutation4(const float3 * colors, const float * weights,
|
||||
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||
|
||||
// Round a, b to the closest 5-6-5 color and expand...
|
||||
a = roundAndExpand(a, start);
|
||||
b = roundAndExpand(b, end);
|
||||
a = roundAndExpand565(a, start);
|
||||
b = roundAndExpand565(b, end);
|
||||
|
||||
// compute the error
|
||||
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||
@ -432,8 +454,8 @@ __device__ float evalPermutation3(const float3 * colors, const float * weights,
|
||||
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||
|
||||
// Round a, b to the closest 5-6-5 color and expand...
|
||||
a = roundAndExpand(a, start);
|
||||
b = roundAndExpand(b, end);
|
||||
a = roundAndExpand565(a, start);
|
||||
b = roundAndExpand565(b, end);
|
||||
|
||||
// compute the error
|
||||
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
|
||||
@ -715,22 +737,50 @@ __device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xr
|
||||
result[bid].y = indices;
|
||||
}
|
||||
|
||||
__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
|
||||
int r = color.x * 255;
|
||||
int g = color.y * 255;
|
||||
int b = color.z * 255;
|
||||
|
||||
ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5) | OMatch5[b][0];
|
||||
ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5) | OMatch5[b][1];
|
||||
|
||||
if (color0 < color1)
|
||||
{
|
||||
result[bid].x = (color0 << 16) | color1;
|
||||
result[bid].y = 0xffffffff;
|
||||
}
|
||||
else
|
||||
{
|
||||
result[bid].x = (color1 << 16) | color0;
|
||||
result[bid].y = 0xaaaaaaaa;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Compress color block
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__global__ void compress(const uint * permutations, const uint * image, uint2 * result)
|
||||
__global__ void compressDXT1(const uint * permutations, const uint * image, uint2 * result)
|
||||
{
|
||||
__shared__ float3 colors[16];
|
||||
__shared__ float3 sums[16];
|
||||
__shared__ int xrefs[16];
|
||||
__shared__ int sameColor;
|
||||
|
||||
loadColorBlock(image, colors, sums, xrefs);
|
||||
loadColorBlock(image, colors, sums, xrefs, &sameColor);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (sameColor)
|
||||
{
|
||||
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
|
||||
return;
|
||||
}
|
||||
|
||||
ushort bestStart, bestEnd;
|
||||
uint bestPermutation;
|
||||
|
||||
@ -749,7 +799,7 @@ __global__ void compress(const uint * permutations, const uint * image, uint2 *
|
||||
}
|
||||
|
||||
|
||||
__global__ void compressWeighted(const uint * permutations, const uint * image, uint2 * result)
|
||||
__global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result)
|
||||
{
|
||||
__shared__ float3 colors[16];
|
||||
__shared__ float3 sums[16];
|
||||
@ -845,8 +895,8 @@ __device__ void optimizeAlpha8(const float alphas[16], uchar & a0, uchar & a1)
|
||||
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
||||
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
||||
|
||||
a0 = roundAndExpand(a);
|
||||
a1 = roundAndExpand(b);
|
||||
a0 = roundAndExpand8(a);
|
||||
a1 = roundAndExpand8(b);
|
||||
}
|
||||
*/
|
||||
/*
|
||||
@ -978,12 +1028,12 @@ extern "C" void setupCompressKernel(const float weights[3])
|
||||
// Launch kernel
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern "C" void compressKernel(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
compress<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
extern "C" void compressWeightedKernel(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
compressWeighted<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
}
|
||||
|
@ -48,29 +48,14 @@ using namespace nvtt;
|
||||
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
#define MAX_BLOCKS 8192U // 32768, 65535
|
||||
|
||||
|
||||
extern "C" void setupCompressKernel(const float weights[3]);
|
||||
extern "C" void compressKernel(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
||||
extern "C" void compressWeightedKernel(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
||||
extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
||||
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
||||
|
||||
#include "Bitmaps.h"
|
||||
|
||||
// @@ Store this pointer in CompressionOptions. Allocate in ctor, free in dtor.
|
||||
static uint * d_bitmaps = NULL;
|
||||
|
||||
static void doPrecomputation()
|
||||
{
|
||||
if (d_bitmaps != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Upload bitmaps.
|
||||
cudaMalloc((void**) &d_bitmaps, 992 * sizeof(uint));
|
||||
cudaMemcpy(d_bitmaps, bitmaps, 992 * sizeof(uint), cudaMemcpyHostToDevice);
|
||||
|
||||
// @@ Check for errors.
|
||||
|
||||
// @@ Free allocated memory.
|
||||
}
|
||||
#include "Bitmaps.h" // @@ Rename to BitmapTable.h
|
||||
|
||||
// Convert linear image to block linear.
|
||||
static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
|
||||
@ -92,53 +77,80 @@ static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
|
||||
}
|
||||
}
|
||||
|
||||
#endif // defined HAVE_CUDA
|
||||
#endif
|
||||
|
||||
|
||||
CudaCompressor::CudaCompressor() : m_bitmapTable(NULL), m_data(NULL), m_result(NULL)
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
// Allocate and upload bitmaps.
|
||||
cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint));
|
||||
if (m_bitmapTable != NULL)
|
||||
{
|
||||
cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
// Allocate scratch buffers.
|
||||
cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U);
|
||||
cudaMalloc((void**) &m_result, MAX_BLOCKS * 8U);
|
||||
#endif
|
||||
}
|
||||
|
||||
CudaCompressor::~CudaCompressor()
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
// Free device mem allocations.
|
||||
cudaFree(m_data);
|
||||
cudaFree(m_result);
|
||||
cudaFree(m_bitmapTable);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool CudaCompressor::isValid() const
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
if (cudaGetLastError() != cudaSuccess)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return m_data != NULL && m_result != NULL && m_bitmapTable != NULL;
|
||||
}
|
||||
|
||||
// @@ This code is very repetitive and needs to be cleaned up.
|
||||
|
||||
|
||||
/// Compress image using CUDA.
|
||||
void nv::cudaCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
void CudaCompressor::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
{
|
||||
nvDebugCheck(cuda::isHardwarePresent());
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
doPrecomputation();
|
||||
|
||||
// Image size in blocks.
|
||||
const uint w = (image->width() + 3) / 4;
|
||||
const uint h = (image->height() + 3) / 4;
|
||||
|
||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||
convertToBlockLinear(image, blockLinearImage); // @@ Do this on the GPU!
|
||||
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
||||
|
||||
const uint blockNum = w * h;
|
||||
const uint compressedSize = blockNum * 8;
|
||||
const uint blockMax = 32768; // 49152, 65535
|
||||
|
||||
clock_t start = clock();
|
||||
|
||||
// Allocate image in device memory.
|
||||
uint * d_data = NULL;
|
||||
cudaMalloc((void**) &d_data, min(imageSize, blockMax * 64U));
|
||||
|
||||
// Allocate result.
|
||||
uint * d_result = NULL;
|
||||
cudaMalloc((void**) &d_result, min(compressedSize, blockMax * 8U));
|
||||
|
||||
setupCompressKernel(compressionOptions.colorWeight.ptr());
|
||||
|
||||
// TODO: Add support for multiple GPUs.
|
||||
uint bn = 0;
|
||||
while(bn != blockNum)
|
||||
{
|
||||
uint count = min(blockNum - bn, blockMax);
|
||||
uint count = min(blockNum - bn, MAX_BLOCKS);
|
||||
|
||||
cudaMemcpy(d_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
|
||||
// Launch kernel.
|
||||
compressKernel(count, d_data, d_result, d_bitmaps);
|
||||
compressKernelDXT1(count, m_data, m_result, m_bitmapTable);
|
||||
|
||||
// Check for errors.
|
||||
cudaError_t err = cudaGetLastError();
|
||||
@ -153,7 +165,7 @@ void nv::cudaCompressDXT1(const Image * image, const OutputOptions::Private & ou
|
||||
}
|
||||
|
||||
// Copy result to host, overwrite swizzled image.
|
||||
cudaMemcpy(blockLinearImage, d_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Output result.
|
||||
if (outputOptions.outputHandler != NULL)
|
||||
@ -168,8 +180,6 @@ void nv::cudaCompressDXT1(const Image * image, const OutputOptions::Private & ou
|
||||
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
|
||||
|
||||
free(blockLinearImage);
|
||||
cudaFree(d_data);
|
||||
cudaFree(d_result);
|
||||
|
||||
#else
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
@ -181,13 +191,11 @@ void nv::cudaCompressDXT1(const Image * image, const OutputOptions::Private & ou
|
||||
|
||||
|
||||
/// Compress image using CUDA.
|
||||
void nv::cudaCompressDXT3(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
void CudaCompressor::compressDXT3(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
{
|
||||
nvDebugCheck(cuda::isHardwarePresent());
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
doPrecomputation();
|
||||
|
||||
// Image size in blocks.
|
||||
const uint w = (image->width() + 3) / 4;
|
||||
const uint h = (image->height() + 3) / 4;
|
||||
@ -198,18 +206,9 @@ void nv::cudaCompressDXT3(const Image * image, const OutputOptions::Private & ou
|
||||
|
||||
const uint blockNum = w * h;
|
||||
const uint compressedSize = blockNum * 8;
|
||||
const uint blockMax = 32768; // 49152, 65535
|
||||
|
||||
// Allocate image in device memory.
|
||||
uint * d_data = NULL;
|
||||
cudaMalloc((void**) &d_data, min(imageSize, blockMax * 64U));
|
||||
|
||||
// Allocate result.
|
||||
uint * d_result = NULL;
|
||||
cudaMalloc((void**) &d_result, min(compressedSize, blockMax * 8U));
|
||||
|
||||
AlphaBlockDXT3 * alphaBlocks = NULL;
|
||||
alphaBlocks = (AlphaBlockDXT3 *)malloc(min(compressedSize, blockMax * 8U));
|
||||
alphaBlocks = (AlphaBlockDXT3 *)malloc(min(compressedSize, MAX_BLOCKS * 8U));
|
||||
|
||||
setupCompressKernel(compressionOptions.colorWeight.ptr());
|
||||
|
||||
@ -218,12 +217,12 @@ void nv::cudaCompressDXT3(const Image * image, const OutputOptions::Private & ou
|
||||
uint bn = 0;
|
||||
while(bn != blockNum)
|
||||
{
|
||||
uint count = min(blockNum - bn, blockMax);
|
||||
uint count = min(blockNum - bn, MAX_BLOCKS);
|
||||
|
||||
cudaMemcpy(d_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
|
||||
// Launch kernel.
|
||||
compressWeightedKernel(count, d_data, d_result, d_bitmaps);
|
||||
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
|
||||
|
||||
// Compress alpha in parallel with the GPU.
|
||||
for (uint i = 0; i < count; i++)
|
||||
@ -245,7 +244,7 @@ void nv::cudaCompressDXT3(const Image * image, const OutputOptions::Private & ou
|
||||
}
|
||||
|
||||
// Copy result to host, overwrite swizzled image.
|
||||
cudaMemcpy(blockLinearImage, d_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Output result.
|
||||
if (outputOptions.outputHandler != NULL)
|
||||
@ -265,8 +264,6 @@ void nv::cudaCompressDXT3(const Image * image, const OutputOptions::Private & ou
|
||||
|
||||
free(alphaBlocks);
|
||||
free(blockLinearImage);
|
||||
cudaFree(d_data);
|
||||
cudaFree(d_result);
|
||||
|
||||
#else
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
@ -278,13 +275,11 @@ void nv::cudaCompressDXT3(const Image * image, const OutputOptions::Private & ou
|
||||
|
||||
|
||||
/// Compress image using CUDA.
|
||||
void nv::cudaCompressDXT5(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
void CudaCompressor::compressDXT5(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
{
|
||||
nvDebugCheck(cuda::isHardwarePresent());
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
doPrecomputation();
|
||||
|
||||
// Image size in blocks.
|
||||
const uint w = (image->width() + 3) / 4;
|
||||
const uint h = (image->height() + 3) / 4;
|
||||
@ -295,18 +290,9 @@ void nv::cudaCompressDXT5(const Image * image, const OutputOptions::Private & ou
|
||||
|
||||
const uint blockNum = w * h;
|
||||
const uint compressedSize = blockNum * 8;
|
||||
const uint blockMax = 32768; // 49152, 65535
|
||||
|
||||
// Allocate image in device memory.
|
||||
uint * d_data = NULL;
|
||||
cudaMalloc((void**) &d_data, min(imageSize, blockMax * 64U));
|
||||
|
||||
// Allocate result.
|
||||
uint * d_result = NULL;
|
||||
cudaMalloc((void**) &d_result, min(compressedSize, blockMax * 8U));
|
||||
|
||||
AlphaBlockDXT5 * alphaBlocks = NULL;
|
||||
alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, blockMax * 8U));
|
||||
alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, MAX_BLOCKS * 8U));
|
||||
|
||||
setupCompressKernel(compressionOptions.colorWeight.ptr());
|
||||
|
||||
@ -315,12 +301,12 @@ void nv::cudaCompressDXT5(const Image * image, const OutputOptions::Private & ou
|
||||
uint bn = 0;
|
||||
while(bn != blockNum)
|
||||
{
|
||||
uint count = min(blockNum - bn, blockMax);
|
||||
uint count = min(blockNum - bn, MAX_BLOCKS);
|
||||
|
||||
cudaMemcpy(d_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
|
||||
// Launch kernel.
|
||||
compressWeightedKernel(count, d_data, d_result, d_bitmaps);
|
||||
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
|
||||
|
||||
// Compress alpha in parallel with the GPU.
|
||||
for (uint i = 0; i < count; i++)
|
||||
@ -342,7 +328,7 @@ void nv::cudaCompressDXT5(const Image * image, const OutputOptions::Private & ou
|
||||
}
|
||||
|
||||
// Copy result to host, overwrite swizzled image.
|
||||
cudaMemcpy(blockLinearImage, d_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Output result.
|
||||
if (outputOptions.outputHandler != NULL)
|
||||
@ -362,8 +348,6 @@ void nv::cudaCompressDXT5(const Image * image, const OutputOptions::Private & ou
|
||||
|
||||
free(alphaBlocks);
|
||||
free(blockLinearImage);
|
||||
cudaFree(d_data);
|
||||
cudaFree(d_result);
|
||||
|
||||
#else
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
@ -375,7 +359,7 @@ void nv::cudaCompressDXT5(const Image * image, const OutputOptions::Private & ou
|
||||
|
||||
|
||||
|
||||
#if defined HAVE_CUDA
|
||||
#if 0
|
||||
|
||||
class Task
|
||||
{
|
||||
@ -469,7 +453,7 @@ public:
|
||||
cudaMemcpy(d_blockLinearImage, blockLinearImage, blockCount * 64, cudaMemcpyHostToDevice);
|
||||
|
||||
// Launch kernel.
|
||||
compressKernel(blockCount, d_blockLinearImage, d_compressedImage, d_bitmaps);
|
||||
compressKernelDXT1(blockCount, d_blockLinearImage, d_compressedImage, d_bitmaps);
|
||||
|
||||
// Check for errors.
|
||||
cudaError_t err = cudaGetLastError();
|
||||
@ -511,8 +495,6 @@ private:
|
||||
|
||||
};
|
||||
|
||||
#endif // defined HAVE_CUDA
|
||||
|
||||
|
||||
void nv::cudaCompressDXT1_2(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
{
|
||||
@ -523,9 +505,7 @@ void nv::cudaCompressDXT1_2(const Image * image, const OutputOptions::Private &
|
||||
const uint blockNum = ((w + 3) / 4) * ((h + 3) / 4);
|
||||
const uint blockMax = 32768; // 49152, 65535
|
||||
|
||||
doPrecomputation();
|
||||
|
||||
setupCompressKernel(compressionOptions.colorWeight.ptr());
|
||||
setupCompressKernelDXT1(compressionOptions.colorWeight.ptr());
|
||||
|
||||
ColorBlock rgba;
|
||||
Task task(min(blockNum, blockMax));
|
||||
@ -559,4 +539,4 @@ void nv::cudaCompressDXT1_2(const Image * image, const OutputOptions::Private &
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#endif // 0
|
||||
|
@ -31,11 +31,24 @@ namespace nv
|
||||
{
|
||||
class Image;
|
||||
|
||||
void cudaCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
void cudaCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
void cudaCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
class CudaCompressor
|
||||
{
|
||||
public:
|
||||
CudaCompressor();
|
||||
~CudaCompressor();
|
||||
|
||||
void cudaCompressDXT1_2(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
bool isValid() const;
|
||||
|
||||
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
|
||||
private:
|
||||
|
||||
uint * m_bitmapTable;
|
||||
uint * m_data;
|
||||
uint * m_result;
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
@ -82,6 +82,10 @@ inline __device__ __host__ void operator /=(float3 & b, float f)
|
||||
b.z *= inv;
|
||||
}
|
||||
|
||||
inline __device__ __host__ bool operator ==(float3 a, float3 b)
|
||||
{
|
||||
return a.x == b.x && a.y == b.y && a.z == b.z;
|
||||
}
|
||||
|
||||
inline __device__ __host__ float dot(float3 a, float3 b)
|
||||
{
|
||||
@ -131,15 +135,37 @@ inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
|
||||
float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
|
||||
float m = max(max(x, y), z);
|
||||
float iv = 1.0f / m;
|
||||
#if __DEVICE_EMULATION__
|
||||
if (m == 0.0f) iv = 0.0f;
|
||||
#endif
|
||||
v = make_float3(x*iv, y*iv, z*iv);
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
inline __device__ bool singleColor(const float3 * colors)
|
||||
{
|
||||
#if __DEVICE_EMULATION__
|
||||
bool sameColor = false;
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
sameColor &= (colors[idx] == colors[0]);
|
||||
}
|
||||
return sameColor;
|
||||
#else
|
||||
__shared__ int sameColor[16];
|
||||
|
||||
const int idx = threadIdx.x;
|
||||
|
||||
sameColor[idx] = (colors[idx] == colors[0]);
|
||||
sameColor[idx] &= sameColor[idx^8];
|
||||
sameColor[idx] &= sameColor[idx^4];
|
||||
sameColor[idx] &= sameColor[idx^2];
|
||||
sameColor[idx] &= sameColor[idx^1];
|
||||
|
||||
return sameColor[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
inline __device__ void colorSums(const float3 * colors, float3 * sums)
|
||||
{
|
||||
#if __DEVICE_EMULATION__
|
||||
|
@ -49,6 +49,14 @@
|
||||
|
||||
#define NVTT_VERSION 200
|
||||
|
||||
#define NVTT_DECLARE_PIMPL(Class) \
|
||||
private: \
|
||||
Class(const Class &); \
|
||||
void operator=(const Class &); \
|
||||
public: \
|
||||
struct Private; \
|
||||
Private & m
|
||||
|
||||
|
||||
// Public interface.
|
||||
namespace nvtt
|
||||
@ -89,6 +97,8 @@ namespace nvtt
|
||||
/// Compression options. This class describes the desired compression format and other compression settings.
|
||||
struct CompressionOptions
|
||||
{
|
||||
NVTT_DECLARE_PIMPL(CompressionOptions);
|
||||
|
||||
NVTT_API CompressionOptions();
|
||||
NVTT_API ~CompressionOptions();
|
||||
|
||||
@ -104,10 +114,6 @@ namespace nvtt
|
||||
NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask);
|
||||
|
||||
NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
|
||||
|
||||
//private:
|
||||
struct Private;
|
||||
Private & m;
|
||||
};
|
||||
|
||||
|
||||
@ -170,6 +176,8 @@ namespace nvtt
|
||||
/// Input options. Specify format and layout of the input texture.
|
||||
struct InputOptions
|
||||
{
|
||||
NVTT_DECLARE_PIMPL(InputOptions);
|
||||
|
||||
NVTT_API InputOptions();
|
||||
NVTT_API ~InputOptions();
|
||||
|
||||
@ -214,10 +222,6 @@ namespace nvtt
|
||||
// Set resizing options.
|
||||
NVTT_API void setMaxExtents(int d);
|
||||
NVTT_API void setRoundMode(RoundMode mode);
|
||||
|
||||
//private:
|
||||
struct Private;
|
||||
Private & m;
|
||||
};
|
||||
|
||||
|
||||
@ -258,6 +262,8 @@ namespace nvtt
|
||||
/// the compressor to the user.
|
||||
struct OutputOptions
|
||||
{
|
||||
NVTT_DECLARE_PIMPL(OutputOptions);
|
||||
|
||||
NVTT_API OutputOptions();
|
||||
NVTT_API ~OutputOptions();
|
||||
|
||||
@ -269,16 +275,14 @@ namespace nvtt
|
||||
NVTT_API void setOutputHandler(OutputHandler * outputHandler);
|
||||
NVTT_API void setErrorHandler(ErrorHandler * errorHandler);
|
||||
NVTT_API void setOutputHeader(bool outputHeader);
|
||||
|
||||
//private:
|
||||
struct Private;
|
||||
Private & m;
|
||||
};
|
||||
|
||||
|
||||
/// Texture compressor.
|
||||
struct Compressor
|
||||
{
|
||||
NVTT_DECLARE_PIMPL(Compressor);
|
||||
|
||||
NVTT_API Compressor();
|
||||
NVTT_API ~Compressor();
|
||||
|
||||
@ -290,10 +294,6 @@ namespace nvtt
|
||||
|
||||
// Estimate the size of compressing the input with the given options.
|
||||
NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const;
|
||||
|
||||
//private:
|
||||
struct Private;
|
||||
Private & m;
|
||||
};
|
||||
|
||||
|
||||
|
@ -50,6 +50,16 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vec4( const float * v )
|
||||
{
|
||||
union { vector float v; float c[4]; } u;
|
||||
u.c[0] = v[0];
|
||||
u.c[1] = v[1];
|
||||
u.c[2] = v[2];
|
||||
u.c[3] = v[3];
|
||||
m_v = u.v;
|
||||
}
|
||||
|
||||
Vec4( float x, float y, float z, float w )
|
||||
{
|
||||
union { vector float v; float c[4]; } u;
|
||||
|
@ -130,10 +130,13 @@ struct NormalError
|
||||
|
||||
void done()
|
||||
{
|
||||
ade /= samples;
|
||||
mse /= samples * 3;
|
||||
rmse = sqrt(mse);
|
||||
psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
|
||||
if (samples)
|
||||
{
|
||||
ade /= samples;
|
||||
mse /= samples * 3;
|
||||
rmse = sqrt(mse);
|
||||
psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
|
||||
}
|
||||
}
|
||||
|
||||
void print()
|
||||
|
Reference in New Issue
Block a user