Merge changes from the witness.

This commit is contained in:
castano
2014-11-04 17:49:29 +00:00
parent 4cb60cc5ba
commit d019cd7080
86 changed files with 3534 additions and 882 deletions

View File

@ -165,6 +165,10 @@ void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
// Use a single thread to compress small textures.
if (context.bh < 4) dispatcher = &sequential;
#if _DEBUG
dispatcher = &sequential;
#endif
const uint count = context.bw * context.bh;
const uint size = context.bs * count;
context.mem = new uint8[size];
@ -231,6 +235,10 @@ void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, c
// Use a single thread to compress small textures.
if (context.bh < 4) dispatcher = &sequential;
#if _DEBUG
dispatcher = &sequential;
#endif
const uint count = context.bw * context.bh;
const uint size = context.bs * count;
context.mem = new uint8[size];

View File

@ -27,6 +27,7 @@
#include "ClusterFit.h"
#include "nvmath/Fitting.h"
#include "nvmath/Vector.inl"
#include "nvmath/ftoi.h"
#include "nvimage/ColorBlock.h"
#include <float.h> // FLT_MAX
@ -37,7 +38,8 @@ ClusterFit::ClusterFit()
{
}
void ClusterFit::setColourSet(const ColorSet * set)
// @@ Deprecate. Do not use color set directly.
void ClusterFit::setColorSet(const ColorSet * set)
{
// initialise the best error
#if NVTT_USE_SIMD
@ -58,6 +60,7 @@ void ClusterFit::setColourSet(const ColorSet * set)
}
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(m_count, values, set->weights, metric);
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(m_count, values, set->weights, metric);
// build the list of values
int order[16];
@ -107,7 +110,72 @@ void ClusterFit::setColourSet(const ColorSet * set)
}
void ClusterFit::setMetric(Vector4::Arg w)
void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count)
{
// initialise the best error
#if NVTT_USE_SIMD
m_besterror = SimdVector( FLT_MAX );
Vector3 metric = m_metric.toVector3();
#else
m_besterror = FLT_MAX;
Vector3 metric = m_metric;
#endif
m_count = count;
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(count, colors, weights, metric);
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(count, colors, weights, metric);
// build the list of values
int order[16];
float dps[16];
for (uint i = 0; i < m_count; ++i)
{
dps[i] = dot(colors[i], principal);
order[i] = i;
}
// stable sort
for (uint i = 0; i < m_count; ++i)
{
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
{
swap(dps[j], dps[j - 1]);
swap(order[j], order[j - 1]);
}
}
// weight all the points
#if NVTT_USE_SIMD
m_xxsum = SimdVector( 0.0f );
m_xsum = SimdVector( 0.0f );
#else
m_xxsum = Vector3(0.0f);
m_xsum = Vector3(0.0f);
m_wsum = 0.0f;
#endif
for (uint i = 0; i < m_count; ++i)
{
int p = order[i];
#if NVTT_USE_SIMD
NV_ALIGN_16 Vector4 tmp(colors[p], 1);
m_weighted[i] = SimdVector(tmp.component) * SimdVector(weights[p]);
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
#else
m_weighted[i] = colors[p] * weights[p];
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
m_weights[i] = weights[p];
m_wsum += m_weights[i];
#endif
}
}
void ClusterFit::setColorWeights(Vector4::Arg w)
{
#if NVTT_USE_SIMD
NV_ALIGN_16 Vector4 tmp(w.xyz(), 1);
@ -292,12 +360,21 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
SimdVector e4 = multiplyAdd( two, e3, e1 );
#if 1
// apply the metric to the error term
SimdVector e5 = e4 * m_metricSqr;
SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
#else
// @@ Is there a horizontal max SIMD instruction?
SimdVector error = e4.splatX() + e4.splatY() + e4.splatZ();
error *= two;
error += max(max(e4.splatX(), e4.splatY()), e4.splatZ());
error -= min(min(e4.splatX(), e4.splatY()), e4.splatZ());
#endif
// keep the solution if it wins
if( compareAnyLessThan( error, besterror ) )
if (compareAnyLessThan(error, besterror))
{
besterror = error;
beststart = a;
@ -317,7 +394,7 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
}
// save the block if necessary
if( compareAnyLessThan( besterror, m_besterror ) )
if (compareAnyLessThan(besterror, m_besterror))
{
*start = beststart.toVector3();
*end = bestend.toVector3();
@ -333,6 +410,29 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
#else
inline Vector3 round565(const Vector3 & v) {
uint r = ftoi_floor(v.x * 31.0f);
float r0 = float(((r+0) << 3) | ((r+0) >> 2));
float r1 = float(((r+1) << 3) | ((r+1) >> 2));
if (fabs(v.x - r1) < fabs(v.x - r0)) r = min(r+1, 31U);
r = (r << 3) | (r >> 2);
uint g = ftoi_floor(v.y * 63.0f);
float g0 = float(((g+0) << 2) | ((g+0) >> 4));
float g1 = float(((g+1) << 2) | ((g+1) >> 4));
if (fabs(v.y - g1) < fabs(v.y - g0)) g = min(g+1, 63U);
g = (g << 2) | (g >> 4);
uint b = ftoi_floor(v.z * 31.0f);
float b0 = float(((b+0) << 3) | ((b+0) >> 2));
float b1 = float(((b+1) << 3) | ((b+1) >> 2));
if (fabs(v.z - b1) < fabs(v.z - b0)) b = min(b+1, 31U);
b = (b << 3) | (b >> 2);
return Vector3(float(r)/255, float(g)/255, float(b)/255);
}
bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
{
const uint count = m_count;
@ -374,8 +474,29 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
// clamp to the grid
a = clamp(a, 0, 1);
b = clamp(b, 0, 1);
a = floor(grid * a + 0.5f) * gridrcp;
b = floor(grid * b + 0.5f) * gridrcp;
//a = floor(grid * a + 0.5f) * gridrcp;
//b = floor(grid * b + 0.5f) * gridrcp;
//int ar = ftoi_round(31 * a.x); ar = (ar << 3) | (ar >> 2); a.x = float(ar) / 255.0f;
//int ag = ftoi_round(63 * a.y); ar = (ag << 2) | (ag >> 4); a.y = float(ag) / 255.0f;
//int ab = ftoi_round(31 * a.z); ar = (ab << 3) | (ab >> 2); a.z = float(ab) / 255.0f;
//int br = ftoi_round(31 * b.x); br = (br << 3) | (br >> 2); b.x = float(br) / 255.0f;
//int bg = ftoi_round(63 * b.y); br = (bg << 2) | (bg >> 4); b.y = float(bg) / 255.0f;
//int bb = ftoi_round(31 * b.z); br = (bb << 3) | (bb >> 2); b.z = float(bb) / 255.0f;
/*a = floor(a * grid + 0.5f);
a.x = (a.x * 8 + floorf(a.x / 4)) / 255.0f;
a.y = (a.y * 4 + floorf(a.y / 16)) / 255.0f;
a.z = (a.z * 8 + floorf(a.z / 4)) / 255.0f;
b = floor(b * grid + 0.5f);
b.x = (b.x * 8 + floorf(b.x / 4)) / 255.0f;
b.y = (b.y * 4 + floorf(b.y / 16)) / 255.0f;
b.z = (b.z * 8 + floorf(b.z / 4)) / 255.0f;*/
a = round565(a);
b = round565(b);
// compute the error
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
@ -461,8 +582,30 @@ bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
// clamp to the grid
a = clamp(a, 0, 1);
b = clamp(b, 0, 1);
a = floor(a * grid + 0.5f) * gridrcp;
b = floor(b * grid + 0.5f) * gridrcp;
//a = floor(a * grid + 0.5f) * gridrcp;
//b = floor(b * grid + 0.5f) * gridrcp;
//int ar = ftoi_round(31 * a.x); ar = (ar << 3) | (ar >> 2); a.x = float(ar) / 255.0f;
//int ag = ftoi_round(63 * a.y); ar = (ag << 2) | (ag >> 4); a.y = float(ag) / 255.0f;
//int ab = ftoi_round(31 * a.z); ar = (ab << 3) | (ab >> 2); a.z = float(ab) / 255.0f;
//int br = ftoi_round(31 * b.x); br = (br << 3) | (br >> 2); b.x = float(br) / 255.0f;
//int bg = ftoi_round(63 * b.y); br = (bg << 2) | (bg >> 4); b.y = float(bg) / 255.0f;
//int bb = ftoi_round(31 * b.z); br = (bb << 3) | (bb >> 2); b.z = float(bb) / 255.0f;
/*
a = floor(a * grid + 0.5f);
a.x = (a.x * 8 + floorf(a.x / 4)) / 255.0f;
a.y = (a.y * 4 + floorf(a.y / 16)) / 255.0f;
a.z = (a.z * 8 + floorf(a.z / 4)) / 255.0f;
b = floor(b * grid + 0.5f);
b.x = (b.x * 8 + floorf(b.x / 4)) / 255.0f;
b.y = (b.y * 4 + floorf(b.y / 16)) / 255.0f;
b.z = (b.z * 8 + floorf(b.z / 4)) / 255.0f;
*/
a = round565(a);
b = round565(b);
// compute the error
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );

View File

@ -31,8 +31,8 @@
#include "nvmath/Vector.h"
// Use SIMD version if altivec or SSE are available.
#define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE)
//#define NVTT_USE_SIMD 0
//#define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE)
#define NVTT_USE_SIMD 0
namespace nv {
@ -43,9 +43,10 @@ namespace nv {
public:
ClusterFit();
void setColourSet(const ColorSet * set);
void setColorSet(const ColorSet * set);
void setColorSet(const Vector3 * colors, const float * weights, int count);
void setMetric(const Vector4 & w);
void setColorWeights(const Vector4 & w);
float bestError() const;
bool compress3(Vector3 * start, Vector3 * end);

View File

@ -246,11 +246,14 @@ unsigned int CompressionOptions::d3d9Format() const
FOURCC_ATI2, // Format_BC5
FOURCC_DXT1, // Format_DXT1n
0, // Format_CTX1
0, // Format_BC6
0, // Format_BC7
0, // Format_RGBE
MAKEFOURCC('B', 'C', '6', 'H'), // Format_BC6
MAKEFOURCC('B', 'C', '7', 'L'), // Format_BC7
FOURCC_ATI2, // Format_BC5_Luma
FOURCC_DXT5, // Format_BC3_RGBM
};
NV_COMPILER_CHECK(NV_ARRAY_SIZE(d3d9_formats) == Format_Count);
return d3d9_formats[m.format];
}
}

View File

@ -31,49 +31,90 @@
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
#include "nvmath/ftoi.h"
#include <new> // placement new
using namespace nv;
using namespace nvtt;
void FastCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
void FastCompressorBC4::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI1 * block = new(output) BlockATI1;
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
QuickCompress::compressDXT5A(rgba, &block->alpha);
AlphaBlock4x4 tmp;
tmp.init(src, 0); // Copy red to alpha
QuickCompress::compressDXT5A(tmp, &block->alpha);
}
void FastCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
void FastCompressorBC5::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI2 * block = new(output) BlockATI2;
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
QuickCompress::compressDXT5A(rgba, &block->x);
AlphaBlock4x4 tmp;
tmp.init(src, 0); // Copy red to alpha
QuickCompress::compressDXT5A(tmp, &block->x);
rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
QuickCompress::compressDXT5A(rgba, &block->y);
tmp.init(src, 1); // Copy green to alpha
QuickCompress::compressDXT5A(tmp, &block->y);
}
void ProductionCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
void ProductionCompressorBC4::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI1 * block = new(output) BlockATI1;
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
OptimalCompress::compressDXT5A(rgba, &block->alpha);
AlphaBlock4x4 tmp;
tmp.init(src, 0); // Copy red to alpha
OptimalCompress::compressDXT5A(tmp, &block->alpha);
}
void ProductionCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
void ProductionCompressorBC5::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI2 * block = new(output) BlockATI2;
AlphaBlock4x4 tmp;
tmp.init(src, 0); // Copy red to alpha
OptimalCompress::compressDXT5A(tmp, &block->x);
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
OptimalCompress::compressDXT5A(rgba, &block->x);
rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
OptimalCompress::compressDXT5A(rgba, &block->y);
tmp.init(src, 1); // Copy green to alpha
OptimalCompress::compressDXT5A(tmp, &block->y);
}
void ProductionCompressorBC5_Luma::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI2 * block = new(output) BlockATI2;
AlphaBlock4x4 tmp;
tmp.init(set, /*channel=*/0);
OptimalCompress::compressDXT5A(tmp, &block->x);
// Decode block->x
AlphaBlock4x4 decoded;
block->x.decodeBlock(&decoded);
const float R = 1.0f / 256.0f; // Maximum residual that we can represent. @@ Tweak this.
// Compute residual block.
for (int i = 0; i < 16; i++) {
float in = set.color(i).x; // [0,1]
float out = float(decoded.alpha[i]) / 255.0f; // [0,1]
float residual = (out - in); // [-1,1], but usually [-R,R]
// Normalize residual to [-1,1] range.
residual /= R;
// Pack in [0,1] range.
residual = residual * 0.5f + 0.5f;
tmp.alpha[i] = nv::ftoi_round(nv::saturate(residual) * 255.0f);
}
OptimalCompress::compressDXT5A(tmp, &block->y);
}

View File

@ -58,6 +58,13 @@ namespace nv
virtual uint blockSize() const { return 16; }
};
struct ProductionCompressorBC5_Luma : public ColorSetCompressor
{
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
} // nv namespace

View File

@ -24,7 +24,6 @@
#include "CompressorDX11.h"
#include <cstring>
#include "nvtt.h"
#include "CompressionOptions.h"
#include "nvimage/ColorBlock.h"
@ -34,16 +33,16 @@
#include "bc6h/zoh.h"
#include "bc7/avpcl.h"
#include <string.h> // memset
using namespace nv;
using namespace nvtt;
void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
// !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
NV_UNUSED(alphaMode); // ZOH does not support alpha.
// !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
if (compressionOptions.pixelType == PixelType_UnsignedFloat ||
compressionOptions.pixelType == PixelType_UnsignedNorm ||
@ -56,44 +55,60 @@ void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const Co
ZOH::Utils::FORMAT = ZOH::SIGNED_F16;
}
// Convert NVTT's tile struct to ZOH's, and convert float to half.
ZOH::Tile zohTile(tile.w, tile.h);
memset(zohTile.data, 0, sizeof(zohTile.data));
memset(zohTile.importance_map, 0, sizeof(zohTile.importance_map));
for (uint y = 0; y < tile.h; ++y)
{
for (uint x = 0; x < tile.w; ++x)
{
Vector3 color = tile.color(x, y).xyz();
uint16 rHalf = to_half(color.x);
uint16 gHalf = to_half(color.y);
uint16 bHalf = to_half(color.z);
zohTile.data[y][x].x = ZOH::Tile::half2float(rHalf);
zohTile.data[y][x].y = ZOH::Tile::half2float(gHalf);
zohTile.data[y][x].z = ZOH::Tile::half2float(bHalf);
zohTile.importance_map[y][x] = 1.0f;
}
}
// Convert NVTT's tile struct to ZOH's, and convert float to half.
ZOH::Tile zohTile(tile.w, tile.h);
memset(zohTile.data, 0, sizeof(zohTile.data));
memset(zohTile.importance_map, 0, sizeof(zohTile.importance_map));
for (uint y = 0; y < tile.h; ++y)
{
for (uint x = 0; x < tile.w; ++x)
{
Vector4 color = tile.color(x, y);
uint16 rHalf = to_half(color.x);
uint16 gHalf = to_half(color.y);
uint16 bHalf = to_half(color.z);
zohTile.data[y][x].x = ZOH::Tile::half2float(rHalf);
zohTile.data[y][x].y = ZOH::Tile::half2float(gHalf);
zohTile.data[y][x].z = ZOH::Tile::half2float(bHalf);
if (alphaMode == AlphaMode_Transparency) {
zohTile.importance_map[y][x] = color.w;
}
else {
zohTile.importance_map[y][x] = 1.0f;
}
}
}
ZOH::compress(zohTile, (char *)output);
}
void CompressorBC7::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
// !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
// !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
AVPCL::mode_rgb = false;
AVPCL::flag_premult = (alphaMode == AlphaMode_Premultiplied);
AVPCL::flag_nonuniform = false;
AVPCL::flag_nonuniform_ati = false;
AVPCL::mode_rgb = false;
AVPCL::flag_premult = (alphaMode == AlphaMode_Premultiplied);
AVPCL::flag_nonuniform = false;
AVPCL::flag_nonuniform_ati = false;
// Convert NVTT's tile struct to AVPCL's.
AVPCL::Tile avpclTile(tile.w, tile.h);
memset(avpclTile.data, 0, sizeof(avpclTile.data));
for (uint y = 0; y < tile.h; ++y) {
for (uint x = 0; x < tile.w; ++x) {
Vector4 color = tile.color(x, y);
avpclTile.data[y][x] = color * 255.0f;
/*if (alphaMode == AlphaMode_Transparency) {
avpclTile.importance_map[y][x] = color.w;
}
else*/ {
avpclTile.importance_map[y][x] = 1.0f;
}
}
}
// Convert NVTT's tile struct to AVPCL's.
AVPCL::Tile avpclTile(tile.w, tile.h);
memset(avpclTile.data, 0, sizeof(avpclTile.data));
for (uint y = 0; y < tile.h; ++y)
for (uint x = 0; x < tile.w; ++x)
avpclTile.data[y][x] = tile.color(x, y) * 255.0f;
AVPCL::compress(avpclTile, (char *)output);
AVPCL::compress(avpclTile, (char *)output);
}

View File

@ -112,7 +112,8 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha
QuickCompress::compressDXT5(rgba, block);
}
#if 0
#if 1
void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
set.setUniformWeights();
@ -125,11 +126,14 @@ void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, co
Color32 c = toColor32(set.colors[0]);
OptimalCompress::compressDXT1(c, block);
}
/*else if (set.colorCount == 2) {
QuickCompress::compressDXT1(..., block);
}*/
else
{
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&set);
fit.setColorWeights(compressionOptions.colorWeight);
fit.setColorSet(&set);
Vector3 start, end;
fit.compress4(&start, &end);
@ -142,6 +146,37 @@ void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, co
}
}
}
#elif 1
extern void compress_dxt1_bounding_box_exhaustive(const ColorBlock & input, BlockDXT1 * output);
void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT1 * block = new(output) BlockDXT1;
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), block);
//compress_dxt1_single_color_optimal(rgba.color(0), block);
}
else
{
// Do an exhaustive search inside the bounding box.
compress_dxt1_bounding_box_exhaustive(rgba, block);
}
/*else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, nvsquish::kDxt1);
fit.Compress(output);
}*/
}
#else
void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
@ -304,6 +339,309 @@ void CompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode
}
void CompressorBC3_RGBM::compressBlock(ColorSet & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT5 * block = new(output)BlockDXT5;
if (alphaMode == AlphaMode_Transparency) {
src.setAlphaWeights();
}
else {
src.setUniformWeights();
}
// Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors.
// Compress the resulting M values optimally.
// Repeat this several times until compression error does not improve?
//Vector3 rgb_block[16];
//float m_block[16];
// Init RGB/M block.
const float threshold = 0.15f; // @@ Use compression options.
#if 0
nvsquish::WeightedClusterFit fit;
ColorBlock rgba;
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, threshold));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgba.color(i) = toColor32(Vector4(r, g, b, a));
}
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
#endif
#if 1
ColorSet rgb;
rgb.allocate(src.w, src.h); // @@ Handle smaller blocks.
if (src.colorCount != 16) {
nvDebugBreak();
}
for (uint i = 0; i < src.colorCount; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, threshold));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setColorWeights(compressionOptions.colorWeight);
fit.setColorSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
QuickCompress::outputBlock4(rgb, start, end, &block->color);
}
#endif
// Decompress RGB/M block.
nv::ColorBlock RGB;
block->color.decodeBlock(&RGB);
#if 1
AlphaBlock4x4 M;
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float r = RGB.color(i).r / 255.0f;
float g = RGB.color(i).g / 255.0f;
float b = RGB.color(i).b / 255.0f;
float m = (R / r + G / g + B / b) / 3.0f;
//float m = max((R / r + G / g + B / b) / 3.0f, threshold);
//float m = max(max(R / r, G / g), max(B / b, threshold));
//float m = max(max(R, G), max(B, threshold));
m = (m - threshold) / (1 - threshold);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = src.weights[i];
}
// Compress M.
if (compressionOptions.quality == Quality_Fastest) {
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#else
OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha);
#endif
#if 0
// Decompress M.
block->alpha.decodeBlock(&M);
rgb.allocate(src.w, src.h); // @@ Handle smaller blocks.
for (uint i = 0; i < src.colorCount; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
//float m = max(max(R, G), max(B, threshold));
float m = float(M.alpha[i]) / 255.0f * (1 - threshold) + threshold;
float r = R / m;
float g = G / m;
float b = B / m;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
QuickCompress::outputBlock4(rgb, start, end, &block->color);
}
#endif
#if 0
block->color.decodeBlock(&RGB);
//AlphaBlock4x4 M;
//M.initWeights(src);
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float r = RGB.color(i).r / 255.0f;
float g = RGB.color(i).g / 255.0f;
float b = RGB.color(i).b / 255.0f;
float m = (R / r + G / g + B / b) / 3.0f;
//float m = max((R / r + G / g + B / b) / 3.0f, threshold);
//float m = max(max(R / r, G / g), max(B / b, threshold));
//float m = max(max(R, G), max(B, threshold));
m = (m - threshold) / (1 - threshold);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = src.weights[i];
}
// Compress M.
if (compressionOptions.quality == Quality_Fastest) {
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#endif
#if 0
src.fromRGBM(M, threshold);
src.createMinimalSet(/*ignoreTransparent=*/true);
if (src.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(src.color(0), &block->color);
}
else {
// @@ Use our improved compressor.
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&src);
Vector3 start, end;
fit.compress4(&start, &end);
if (fit.compress3(&start, &end)) {
QuickCompress::outputBlock3(src, start, end, block->color);
}
else {
QuickCompress::outputBlock4(src, start, end, block->color);
}
}
#endif // 0
// @@ Decompress color and compute M that best approximates src with these colors? Then compress M again?
// RGBM encoding.
// Maximize precision.
// - Number of possible grey levels:
// - Naive: 2^3 = 8
// - Better: 2^3 + 2^2 = 12
// - How to choose threshold?
// - Ideal = Adaptive per block, don't know where to store.
// - Adaptive per lightmap. How to compute optimal?
// - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range.
// - Optimal compressor: Interpolation artifacts.
// - Color transform.
// - Measure error in post-tone-mapping color space.
// - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game.
// - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space.
// - Enhanced DXT compressor.
// - Typical RGBM encoding as follows:
// rgb -> M = max(rgb), RGB=rgb/M -> RGBM
// - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1.
// - We could ensure that M' >= M during compression.
// - We could clamp RGB anyway.
// - We could add a fixed scale value to take into account compression errors and avoid clamping.
// Compress color.
/*if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}*/
}
#if defined(HAVE_ATITC)
void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)

View File

@ -64,7 +64,7 @@ namespace nv
// Normal CPU compressors.
#if 0
#if 1
struct CompressorDXT1 : public ColorSetCompressor
{
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
@ -108,6 +108,12 @@ namespace nv
virtual uint blockSize() const { return 16; }
};
struct CompressorBC3_RGBM : public ColorSetCompressor
{
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
// External compressors.
#if defined(HAVE_ATITC)

461
src/nvtt/CompressorDXT1.cpp Normal file
View File

@ -0,0 +1,461 @@
#include "CompressorDXT1.h"
#include "SingleColorLookup.h"
#include "ClusterFit.h"
#include "QuickCompressDXT.h" // Deprecate.
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
#include "nvmath/Color.inl"
#include "nvmath/Vector.inl"
#include "nvmath/Fitting.h"
#include "nvmath/ftoi.h"
#include "nvcore/Utils.h" // swap
#include <string.h> // memset
using namespace nv;
inline static void color_block_to_vector_block(const ColorBlock & rgba, Vector3 block[16])
{
for (int i = 0; i < 16; i++)
{
const Color32 c = rgba.color(i);
block[i] = Vector3(c.r, c.g, c.b);
}
}
inline Vector3 r5g6b5_to_vector3(int r, int g, int b)
{
Vector3 c;
c.x = float((r << 3) | (r >> 2));
c.y = float((g << 2) | (g >> 4));
c.z = float((b << 3) | (b >> 2));
return c;
}
inline Vector3 color_to_vector3(Color32 c)
{
const float scale = 1.0f / 255.0f;
return Vector3(c.r * scale, c.g * scale, c.b * scale);
}
inline Color32 vector3_to_color(Vector3 v)
{
Color32 color;
color.r = U8(ftoi_round(saturate(v.x) * 255));
color.g = U8(ftoi_round(saturate(v.y) * 255));
color.b = U8(ftoi_round(saturate(v.z) * 255));
color.a = 255;
}
// Find first valid color.
static bool find_valid_color_rgb(const Vector3 * colors, const float * weights, int count, Vector3 * valid_color)
{
for (int i = 0; i < count; i++) {
if (weights[i] > 0.0f) {
*valid_color = colors[i];
return true;
}
}
// No valid colors.
return false;
}
static bool is_single_color_rgb(const Vector3 * colors, const float * weights, int count, Vector3 color)
{
for (int i = 0; i < count; i++) {
if (weights[i] > 0.0f) {
if (colors[i] != color) return false;
}
}
return true;
}
// Find similar colors and combine them together.
static int reduce_colors(const Vector3 * input_colors, const float * input_weights, Vector3 * colors, float * weights)
{
int n = 0;
for (int i = 0; i < 16; i++)
{
Vector3 ci = input_colors[i];
float wi = input_weights[i];
if (wi > 0) {
// Find matching color.
int j;
for (j = 0; j < n; j++) {
if (equal(colors[j].x, ci.x) && equal(colors[j].y, ci.y) && equal(colors[j].z, ci.z)) {
weights[j] += wi;
break;
}
}
// No match found. Add new color.
if (j == n) {
colors[n] = ci;
weights[n] = wi;
n++;
}
}
}
nvDebugCheck(n <= 16);
return n;
}
// Different ways of estimating the error.
static float evaluate_mse(const Vector3 & p, const Vector3 & c) {
return square(p.x-c.x) + square(p.y-c.y) + square(p.z-c.z);
}
/*static float evaluate_mse(const Vector3 & p, const Vector3 & c, const Vector3 & w) {
return ww.x * square(p.x-c.x) + ww.y * square(p.y-c.y) + ww.z * square(p.z-c.z);
}*/
static int evaluate_mse_rgb(const Color32 & p, const Color32 & c) {
return square(int(p.r)-c.r) + square(int(p.g)-c.g) + square(int(p.b)-c.b);
}
static float evaluate_mse(const Vector3 palette[4], const Vector3 & c) {
float e0 = evaluate_mse(palette[0], c);
float e1 = evaluate_mse(palette[1], c);
float e2 = evaluate_mse(palette[2], c);
float e3 = evaluate_mse(palette[3], c);
return min(min(e0, e1), min(e2, e3));
}
static int evaluate_mse(const Color32 palette[4], const Color32 & c) {
int e0 = evaluate_mse_rgb(palette[0], c);
int e1 = evaluate_mse_rgb(palette[1], c);
int e2 = evaluate_mse_rgb(palette[2], c);
int e3 = evaluate_mse_rgb(palette[3], c);
return min(min(e0, e1), min(e2, e3));
}
static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, int index) {
return evaluate_mse(palette[index], c);
}
static int evaluate_mse(const Color32 palette[4], const Color32 & c, int index) {
return evaluate_mse_rgb(palette[index], c);
}
static float evaluate_mse(const BlockDXT1 * output, Vector3 colors[16]) {
Color32 palette[4];
output->evaluatePalette(palette, /*d3d9=*/false);
// convert palette to float.
Vector3 vector_palette[4];
for (int i = 0; i < 4; i++) {
vector_palette[i] = color_to_vector3(palette[i]);
}
// evaluate error for each index.
float error = 0.0f;
for (int i = 0; i < 16; i++) {
int index = (output->indices >> (2*i)) & 3; // @@ Is this the right order?
error += evaluate_mse(vector_palette, colors[i], index);
}
return error;
}
static int evaluate_mse(const BlockDXT1 * output, Color32 color, int index) {
Color32 palette[4];
output->evaluatePalette(palette, /*d3d9=*/false);
return evaluate_mse(palette, color, index);
}
/*void output_block3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block)
{
Vector3 minColor = start * 255.0f;
Vector3 maxColor = end * 255.0f;
uint16 color0 = roundAndExpand(&minColor);
uint16 color1 = roundAndExpand(&maxColor);
if (color0 > color1) {
swap(maxColor, minColor);
swap(color0, color1);
}
block->col0 = Color16(color0);
block->col1 = Color16(color1);
block->indices = compute_indices3(colors, weights, count, maxColor / 255.0f, minColor / 255.0f);
//optimizeEndPoints3(set, block);
}*/
// Single color compressor, based on:
// https://mollyrocket.com/forums/viewtopic.php?t=392
float nv::compress_dxt1_single_color_optimal(Color32 c, BlockDXT1 * output)
{
output->col0.r = OMatch5[c.r][0];
output->col0.g = OMatch6[c.g][0];
output->col0.b = OMatch5[c.b][0];
output->col1.r = OMatch5[c.r][1];
output->col1.g = OMatch6[c.g][1];
output->col1.b = OMatch5[c.b][1];
output->indices = 0xaaaaaaaa;
if (output->col0.u < output->col1.u)
{
swap(output->col0.u, output->col1.u);
output->indices ^= 0x55555555;
}
return (float) evaluate_mse(output, c, output->indices & 3);
}
float nv::compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output)
{
return compress_dxt1_single_color_optimal(vector3_to_color(color), output);
}
// Low quality baseline compressor.
float nv::compress_dxt1_least_squares_fit(const Vector3 * input_colors, const Vector3 * colors, const float * weights, int count, BlockDXT1 * output)
{
// @@ Iterative best end point fit.
return FLT_MAX;
}
static Color32 bitexpand_color16_to_color32(Color16 c16) {
Color32 c32;
c32.b = (c16.b << 3) | (c16.b >> 2);
c32.g = (c16.g << 2) | (c16.g >> 4);
c32.r = (c16.r << 3) | (c16.r >> 2);
c32.a = 0xFF;
//c32.u = ((c16.u << 3) & 0xf8) | ((c16.u << 5) & 0xfc00) | ((c16.u << 8) & 0xf80000);
//c32.u |= (c32.u >> 5) & 0x070007;
//c32.u |= (c32.u >> 6) & 0x000300;
return c32;
}
static Color32 bitexpand_color16_to_color32(int r, int g, int b) {
Color32 c32;
c32.b = (b << 3) | (b >> 2);
c32.g = (g << 2) | (g >> 4);
c32.r = (r << 3) | (r >> 2);
c32.a = 0xFF;
return c32;
}
static Color16 truncate_color32_to_color16(Color32 c32) {
Color16 c16;
c16.b = (c32.b >> 3);
c16.g = (c32.g >> 2);
c16.r = (c32.r >> 3);
return c16;
}
static float evaluate_palette4(Color32 palette[4]) {
palette[2].r = (2 * palette[0].r + palette[1].r) / 3;
palette[2].g = (2 * palette[0].g + palette[1].g) / 3;
palette[2].b = (2 * palette[0].b + palette[1].b) / 3;
palette[3].r = (2 * palette[1].r + palette[0].r) / 3;
palette[3].g = (2 * palette[1].g + palette[0].g) / 3;
palette[3].b = (2 * palette[1].b + palette[0].b) / 3;
}
static float evaluate_palette3(Color32 palette[4]) {
palette[2].r = (palette[0].r + palette[1].r) / 2;
palette[2].g = (palette[0].g + palette[1].g) / 2;
palette[2].b = (palette[0].b + palette[1].b) / 2;
palette[3].r = 0;
palette[3].g = 0;
palette[3].b = 0;
}
static float evaluate_palette_error(Color32 palette[4], const Color32 * colors, const float * weights, int count) {
float total = 0.0f;
for (int i = 0; i < count; i++) {
total += (weights[i] * weights[i]) * evaluate_mse(palette, colors[i]);
}
return total;
}
float nv::compress_dxt1_bounding_box_exhaustive(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, int max_volume, BlockDXT1 * output)
{
// Compute bounding box.
Vector3 min_color(1.0f);
Vector3 max_color(0.0f);
for (int i = 0; i < count; i++) {
min_color = min(min_color, colors[i]);
max_color = max(max_color, colors[i]);
}
// Convert to 5:6:5
int min_r = ftoi_floor(31 * min_color.x);
int min_g = ftoi_floor(63 * min_color.y);
int min_b = ftoi_floor(31 * min_color.z);
int max_r = ftoi_ceil(31 * max_color.x);
int max_g = ftoi_ceil(63 * max_color.y);
int max_b = ftoi_ceil(31 * max_color.z);
// Expand the box.
int range_r = max_r - min_r;
int range_g = max_g - min_g;
int range_b = max_b - min_b;
min_r = max(0, min_r - (range_r + 1) / 1 - 1);
min_g = max(0, min_g - (range_g + 1) / 1 - 1);
min_b = max(0, min_b - (range_b + 1) / 1 - 1);
max_r = min(31, max_r + (range_r + 1) / 2 + 1);
max_g = min(63, max_g + (range_g + 1) / 2 + 1);
max_b = min(31, max_b + (range_b + 1) / 2 + 1);
// Estimate size of search space.
int volume = (max_r-min_r+1) * (max_g-min_g+1) * (max_b-min_b+1);
// if size under search_limit, then proceed. Note that search_limit is sqrt of number of evaluations.
if (volume > max_volume) {
return FLT_MAX;
}
Color32 colors32[16];
for (int i = 0; i < count; i++) {
colors32[i] = toColor32(Vector4(colors[i], 1));
}
float best_error = FLT_MAX;
Color32 best0, best1;
for(int r0 = min_r; r0 <= max_r; r0++)
for(int r1 = max_r; r1 >= r0; r1--)
for(int g0 = min_g; g0 <= max_g; g0++)
for(int g1 = max_g; g1 >= g0; g1--)
for(int b0 = min_b; b0 <= max_b; b0++)
for(int b1 = max_b; b1 >= b0; b1--)
{
Color32 palette[4];
palette[0] = bitexpand_color16_to_color32(r1, g1, b1);
palette[1] = bitexpand_color16_to_color32(r0, g0, b0);
// Evaluate error in 4 color mode.
evaluate_palette4(palette);
float error = evaluate_palette_error(palette, colors32, weights, count);
if (error < best_error) {
best_error = error;
best0 = palette[0];
best1 = palette[1];
}
#if 0
// Evaluate error in 3 color mode.
evaluate_palette3(palette);
float error = evaluate_palette_error(palette, colors, weights, count);
if (error < best_error) {
best_error = error;
best0 = palette[1];
best1 = palette[0];
}
#endif
}
output->col0 = truncate_color32_to_color16(best0);
output->col1 = truncate_color32_to_color16(best1);
if (output->col0.u <= output->col1.u) {
//output->indices = computeIndices3(colors, best0, best1);
}
else {
//output->indices = computeIndices4(colors, best0, best1);
}
return FLT_MAX;
}
float nv::compress_dxt1_cluster_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, BlockDXT1 * output)
{
ClusterFit fit;
//fit.setColorWeights(compressionOptions.colorWeight);
fit.setColorWeights(Vector4(1)); // @@ Set color weights.
fit.setColorSet(colors, weights, count);
// start & end are in [0, 1] range.
Vector3 start, end;
fit.compress4(&start, &end);
if (fit.compress3(&start, &end)) {
//output_block3(input_colors, start, end, block);
// @@ Output block.
}
else {
//output_block4(input_colors, start, end, block);
// @@ Output block.
}
}
float nv::compress_dxt1(const Vector3 input_colors[16], const float input_weights[16], BlockDXT1 * output)
{
Vector3 colors[16];
float weights[16];
int count = reduce_colors(input_colors, input_weights, colors, weights);
if (count == 0) {
// Output trivial block.
output->col0.u = 0;
output->col1.u = 0;
output->indices = 0;
return 0;
}
if (count == 1) {
return compress_dxt1_single_color_optimal(colors[0], output);
}
// If high quality:
//error = compress_dxt1_bounding_box_exhaustive(colors, weigths, count, 3200, error, output);
//if (error < FLT_MAX) return error;
// This is pretty fast and in some cases can produces better quality than cluster fit.
// error = compress_dxt1_least_squares_fit(colors, weigths, error, output);
//
float error = compress_dxt1_cluster_fit(input_colors, colors, weights, count, output);
return error;
}

38
src/nvtt/CompressorDXT1.h Normal file
View File

@ -0,0 +1,38 @@
namespace nv {
class Color32;
struct ColorBlock;
struct BlockDXT1;
class Vector3;
// All these functions return MSE.
// Optimal compressors:
/*float compress_dxt1_single_color_optimal(const Color32 & rgb, BlockDXT1 * output);
float compress_dxt1_single_color_optimal(const ColorBlock & input, BlockDXT1 * output);
float compress_dxt1_optimal(const ColorBlock & input, BlockDXT1 * output);
// Brute force with restricted search space:
float compress_dxt1_bounding_box_exhaustive(const ColorBlock & input, BlockDXT1 * output);
float compress_dxt1_best_fit_line_exhaustive(const ColorBlock & input, BlockDXT1 * output);
// Fast least squres fitting compressors:
float compress_dxt1_least_squares_fit(const ColorBlock & input, BlockDXT1 * output);
float compress_dxt1_least_squares_fit_iterative(const ColorBlock & input, BlockDXT1 * output);
*/
float compress_dxt1_single_color_optimal(Color32 c, BlockDXT1 * output);
float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output);
float compress_dxt1_least_squares_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, BlockDXT1 * output);
float compress_dxt1_bounding_box_exhaustive(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, int search_limit, BlockDXT1 * output);
float compress_dxt1_cluster_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, BlockDXT1 * output);
float compress_dxt1(const Vector3 colors[16], const float weights[16], BlockDXT1 * output);
}

View File

@ -32,6 +32,7 @@
#include "nvmath/Color.h"
#include "nvmath/Half.h"
#include "nvmath/ftoi.h"
#include "nvcore/Debug.h"
@ -360,7 +361,19 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
ib = iround(clamp(b * 65535.0f, 0.0f, 65535.0f));
ia = iround(clamp(a * 65535.0f, 0.0f, 65535.0f));
}
else if (compressionOptions.pixelType == nvtt::PixelType_SignedNorm) {
// @@
}
else if (compressionOptions.pixelType == nvtt::PixelType_UnsignedInt) {
ir = iround(clamp(r, 0.0f, 65535.0f));
ig = iround(clamp(g, 0.0f, 65535.0f));
ib = iround(clamp(b, 0.0f, 65535.0f));
ia = iround(clamp(a, 0.0f, 65535.0f));
}
else if (compressionOptions.pixelType == nvtt::PixelType_SignedInt) {
// @@
}
uint p = 0;
p |= PixelFormat::convert(ir, 16, rsize) << rshift;
p |= PixelFormat::convert(ig, 16, gsize) << gshift;

View File

@ -268,9 +268,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
else {
img.expandNormals();
}
// Resize input.
img.resize(w, h, d, ResizeFilter_Box);
@ -279,9 +276,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
if (!img.isNormalMap()) {
tmp.toGamma(inputOptions.outputGamma);
}
else {
tmp.packNormals();
}
quantize(tmp, compressionOptions);
compress(tmp, f, 0, compressionOptions, outputOptions);
@ -310,9 +304,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
else {
img.expandNormals();
}
}
else {
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
@ -332,7 +323,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
img.normalizeNormalMap();
}
tmp = img;
tmp.packNormals();
}
else {
tmp = img;
@ -485,34 +475,38 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
else
{
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) {
header.setDX10Format(DXGI_FORMAT_BC1_UNORM);
header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC1_UNORM_SRGB : DXGI_FORMAT_BC1_UNORM);
if (compressionOptions.format == Format_DXT1a) header.setHasAlphaFlag(true);
if (isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_DXT3) {
header.setDX10Format(DXGI_FORMAT_BC2_UNORM);
header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC2_UNORM_SRGB : DXGI_FORMAT_BC2_UNORM);
}
else if (compressionOptions.format == Format_DXT5) {
header.setDX10Format(DXGI_FORMAT_BC3_UNORM);
else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) {
header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC3_UNORM_SRGB : DXGI_FORMAT_BC3_UNORM);
}
else if (compressionOptions.format == Format_DXT5n) {
header.setDX10Format(DXGI_FORMAT_BC3_UNORM);
if (isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_BC4) {
header.setDX10Format(DXGI_FORMAT_BC4_UNORM);
header.setDX10Format(DXGI_FORMAT_BC4_UNORM); // DXGI_FORMAT_BC4_SNORM ?
}
else if (compressionOptions.format == Format_BC5) {
header.setDX10Format(DXGI_FORMAT_BC5_UNORM);
else if (compressionOptions.format == Format_BC5 || compressionOptions.format == Format_BC5_Luma) {
header.setDX10Format(DXGI_FORMAT_BC5_UNORM); // DXGI_FORMAT_BC5_SNORM ?
if (isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_BC6) {
header.setDX10Format(DXGI_FORMAT_BC6H_UF16);
if (compressionOptions.pixelType == PixelType_Float) header.setDX10Format(DXGI_FORMAT_BC6H_SF16);
/*if (compressionOptions.pixelType == PixelType_UnsignedFloat)*/ header.setDX10Format(DXGI_FORMAT_BC6H_UF16); // By default we assume unsigned.
}
else if (compressionOptions.format == Format_BC7) {
header.setDX10Format(DXGI_FORMAT_BC7_UNORM);
header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM);
if (isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_CTX1) {
supported = false;
}
else {
supported = false;
}
@ -597,7 +591,7 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
else if (compressionOptions.format == Format_DXT3) {
header.setFourCC('D', 'X', 'T', '3');
}
else if (compressionOptions.format == Format_DXT5) {
else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) {
header.setFourCC('D', 'X', 'T', '5');
}
else if (compressionOptions.format == Format_DXT5n) {
@ -611,19 +605,21 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
else if (compressionOptions.format == Format_BC4) {
header.setFourCC('A', 'T', 'I', '1');
}
else if (compressionOptions.format == Format_BC5) {
else if (compressionOptions.format == Format_BC5 || compressionOptions.format == Format_BC5_Luma) {
header.setFourCC('A', 'T', 'I', '2');
if (isNormalMap) {
header.setNormalFlag(true);
header.setSwizzleCode('A', '2', 'X', 'Y');
}
}
else if (compressionOptions.format == Format_BC6) { // @@ This is not supported by D3DX. Always use DX10 header with BC6-7 formats.
header.setFourCC('Z', 'O', 'H', ' ');
else if (compressionOptions.format == Format_BC6) {
header.setFourCC('Z', 'O', 'H', ' '); // This is not supported by D3DX. Always use DX10 header with BC6-7 formats.
supported = false;
}
else if (compressionOptions.format == Format_BC7) {
header.setFourCC('Z', 'O', 'L', 'A');
header.setFourCC('Z', 'O', 'L', 'A'); // This is not supported by D3DX. Always use DX10 header with BC6-7 formats.
if (isNormalMap) header.setNormalFlag(true);
supported = false;
}
else if (compressionOptions.format == Format_CTX1) {
header.setFourCC('C', 'T', 'X', '1');
@ -777,6 +773,14 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression
{
return new CompressorBC7;
}
else if (compressionOptions.format == Format_BC5_Luma)
{
return new ProductionCompressorBC5_Luma;
}
else if (compressionOptions.format == Format_BC3_RGBM)
{
return new CompressorBC3_RGBM;
}
return NULL;
}

View File

@ -320,7 +320,7 @@ bool CubeSurface::load(const char * fileName, int mipmap)
if (mipmap < 0) {
mipmap = dds.mipmapCount() - 1 - mipmap;
}
if (mipmap < 0 || mipmap > toI32(dds.mipmapCount())) return false;
if (mipmap < 0 || mipmap > I32(dds.mipmapCount())) return false;
nvtt::InputFormat inputFormat = nvtt::InputFormat_RGBA_16F;
@ -328,12 +328,14 @@ bool CubeSurface::load(const char * fileName, int mipmap)
if (dds.header.hasDX10Header()) {
if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R16G16B16A16_FLOAT) inputFormat = nvtt::InputFormat_RGBA_16F;
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R32G32B32A32_FLOAT) inputFormat = nvtt::InputFormat_RGBA_32F;
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R32_FLOAT) inputFormat = nvtt::InputFormat_R_32F;
else return false;
}
else {
if ((dds.header.pf.flags & DDPF_FOURCC) != 0) {
if (dds.header.pf.fourcc == D3DFMT_A16B16G16R16F) inputFormat = nvtt::InputFormat_RGBA_16F;
else if (dds.header.pf.fourcc == D3DFMT_A32B32G32R32F) inputFormat = nvtt::InputFormat_RGBA_32F;
else if (dds.header.pf.fourcc == D3DFMT_R32F) inputFormat = nvtt::InputFormat_R_32F;
else return false;
}
else {
@ -594,7 +596,7 @@ Vector3 CubeSurface::Private::applyAngularFilter(const Vector3 & filterDir, floa
continue;
}
const int L = toI32(edgeLength-1);
const int L = I32(edgeLength-1);
int x0 = 0, x1 = L;
int y0 = 0, y1 = L;
@ -715,7 +717,7 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir,
continue;
}
const int L = toI32(edgeLength-1);
const int L = I32(edgeLength-1);
int x0 = 0, x1 = L;
int y0 = 0, y1 = L;

View File

@ -202,18 +202,22 @@ bool InputOptions::setMipmapData(const void * data, int width, int height, int d
return false;
}
int imageSize = width * height * depth * 4;
int imageSize = width * height * depth;
if (m.inputFormat == InputFormat_BGRA_8UB)
{
imageSize *= sizeof(uint8);
imageSize *= 4 * sizeof(uint8);
}
else if (m.inputFormat == InputFormat_RGBA_16F)
{
imageSize *= sizeof(uint16);
imageSize *= 4 * sizeof(uint16);
}
else if (m.inputFormat == InputFormat_RGBA_32F)
{
imageSize *= sizeof(float);
imageSize *= 4 * sizeof(float);
}
else if (m.inputFormat == InputFormat_R_32F)
{
imageSize *= 1 * sizeof(float);
}
else
{

View File

@ -32,7 +32,8 @@
#include <nvcore/Utils.h> // swap
#include <limits.h>
#include <limits.h> // INT_MAX
#include <float.h> // FLT_MAX
using namespace nv;
using namespace OptimalCompress;
@ -185,16 +186,16 @@ namespace
return totalError;
}*/
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
static float computeAlphaError(const AlphaBlock4x4 & src, const AlphaBlockDXT5 * dst, float bestError = FLT_MAX)
{
uint8 alphas[8];
block->evaluatePalette(alphas, false); // @@ Use target decoder.
dst->evaluatePalette(alphas, false); // @@ Use target decoder.
int totalError = 0;
float totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint8 alpha = src.alpha[i];
int minDist = INT_MAX;
for (uint p = 0; p < 8; p++)
@ -203,7 +204,7 @@ namespace
minDist = min(dist, minDist);
}
totalError += minDist;
totalError += minDist * src.weights[i];
if (totalError > bestError)
{
@ -215,14 +216,14 @@ namespace
return totalError;
}
static void computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
static void computeAlphaIndices(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst)
{
uint8 alphas[8];
block->evaluatePalette(alphas, false); // @@ Use target decoder.
dst->evaluatePalette(alphas, /*d3d9=*/false); // @@ Use target decoder.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint8 alpha = src.alpha[i];
int minDist = INT_MAX;
int bestIndex = 8;
@ -238,7 +239,7 @@ namespace
}
nvDebugCheck(bestIndex < 8);
block->setIndex(i, bestIndex);
dst->setIndex(i, bestIndex);
}
}
@ -252,19 +253,19 @@ namespace
// https://mollyrocket.com/forums/viewtopic.php?t=392
void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
void OptimalCompress::compressDXT1a(Color32 c, uint alphaMask, BlockDXT1 * dxtBlock)
@ -481,46 +482,68 @@ void OptimalCompress::compressDXT1_Luma(const ColorBlock & rgba, BlockDXT1 * blo
}
void OptimalCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
void OptimalCompress::compressDXT3A(const AlphaBlock4x4 & src, AlphaBlockDXT3 * dst)
{
dxtBlock->alpha0 = quantize4(rgba.color(0).a);
dxtBlock->alpha1 = quantize4(rgba.color(1).a);
dxtBlock->alpha2 = quantize4(rgba.color(2).a);
dxtBlock->alpha3 = quantize4(rgba.color(3).a);
dxtBlock->alpha4 = quantize4(rgba.color(4).a);
dxtBlock->alpha5 = quantize4(rgba.color(5).a);
dxtBlock->alpha6 = quantize4(rgba.color(6).a);
dxtBlock->alpha7 = quantize4(rgba.color(7).a);
dxtBlock->alpha8 = quantize4(rgba.color(8).a);
dxtBlock->alpha9 = quantize4(rgba.color(9).a);
dxtBlock->alphaA = quantize4(rgba.color(10).a);
dxtBlock->alphaB = quantize4(rgba.color(11).a);
dxtBlock->alphaC = quantize4(rgba.color(12).a);
dxtBlock->alphaD = quantize4(rgba.color(13).a);
dxtBlock->alphaE = quantize4(rgba.color(14).a);
dxtBlock->alphaF = quantize4(rgba.color(15).a);
dst->alpha0 = quantize4(src.alpha[0]);
dst->alpha1 = quantize4(src.alpha[1]);
dst->alpha2 = quantize4(src.alpha[2]);
dst->alpha3 = quantize4(src.alpha[3]);
dst->alpha4 = quantize4(src.alpha[4]);
dst->alpha5 = quantize4(src.alpha[5]);
dst->alpha6 = quantize4(src.alpha[6]);
dst->alpha7 = quantize4(src.alpha[7]);
dst->alpha8 = quantize4(src.alpha[8]);
dst->alpha9 = quantize4(src.alpha[9]);
dst->alphaA = quantize4(src.alpha[10]);
dst->alphaB = quantize4(src.alpha[11]);
dst->alphaC = quantize4(src.alpha[12]);
dst->alphaD = quantize4(src.alpha[13]);
dst->alphaE = quantize4(src.alpha[14]);
dst->alphaF = quantize4(src.alpha[15]);
}
void OptimalCompress::compressDXT3A(const ColorBlock & src, AlphaBlockDXT3 * dst)
{
AlphaBlock4x4 tmp;
tmp.init(src, 3);
compressDXT3A(tmp, dst);
}
void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
void OptimalCompress::compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst)
{
uint8 mina = 255;
uint8 maxa = 0;
uint8 mina_no01 = 255;
uint8 maxa_no01 = 0;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint8 alpha = src.alpha[i];
mina = min(mina, alpha);
maxa = max(maxa, alpha);
if (alpha != 0 && alpha != 255) {
mina_no01 = min(mina_no01, alpha);
maxa_no01 = max(maxa_no01, alpha);
}
}
dxtBlock->alpha0 = maxa;
dxtBlock->alpha1 = mina;
if (maxa - mina < 8) {
dst->alpha0 = maxa;
dst->alpha1 = mina;
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, dxtBlock);
nvDebugCheck(computeAlphaError(src, dst) == 0);
}
else if (maxa_no01 - mina_no01 < 6) {
dst->alpha0 = mina_no01;
dst->alpha1 = maxa_no01;
nvDebugCheck(computeAlphaError(src, dst) == 0);
}
else {
float besterror = computeAlphaError(src, dst);
int besta0 = maxa;
int besta1 = mina;
@ -535,9 +558,9 @@ void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dx
{
nvDebugCheck(a0 - a1 > 8);
dxtBlock->alpha0 = a0;
dxtBlock->alpha1 = a1;
int error = computeAlphaError(rgba, dxtBlock, besterror);
dst->alpha0 = a0;
dst->alpha1 = a1;
float error = computeAlphaError(src, dst, besterror);
if (error < besterror)
{
@ -548,10 +571,241 @@ void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dx
}
}
dxtBlock->alpha0 = besta0;
dxtBlock->alpha1 = besta1;
// Try using the 6 step encoding.
/*if (mina == 0 || maxa == 255)*/ {
// Expand search space a bit.
const int alphaExpand = 6;
mina_no01 = (mina_no01 <= alphaExpand) ? 0 : mina_no01 - alphaExpand;
maxa_no01 = (maxa_no01 >= 255 - alphaExpand) ? 255 : maxa_no01 + alphaExpand;
for (int a0 = mina_no01 + 9; a0 < maxa_no01; a0++)
{
for (int a1 = mina_no01; a1 < a0 - 8; a1++)
{
nvDebugCheck(a0 - a1 > 8);
dst->alpha0 = a1;
dst->alpha1 = a0;
float error = computeAlphaError(src, dst, besterror);
if (error < besterror)
{
besterror = error;
besta0 = a1;
besta1 = a0;
}
}
}
}
dst->alpha0 = besta0;
dst->alpha1 = besta1;
}
computeAlphaIndices(rgba, dxtBlock);
computeAlphaIndices(src, dst);
}
void OptimalCompress::compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst)
{
AlphaBlock4x4 tmp;
tmp.init(src, 3);
compressDXT5A(tmp, dst);
}
#include "nvmath/Vector.inl"
#include "nvmath/ftoi.h"
const float threshold = 0.15f;
static float computeAlphaError_RGBM(const ColorSet & src, const ColorBlock & RGB, const AlphaBlockDXT5 * dst, float bestError = FLT_MAX)
{
uint8 alphas[8];
dst->evaluatePalette(alphas, /*d3d9=*/false); // @@ Use target decoder.
float totalError = 0;
for (uint i = 0; i < 16; i++)
{
float R = src.color(i).x;
float G = src.color(i).y;
float B = src.color(i).z;
float r = float(RGB.color(i).r) / 255.0f;
float g = float(RGB.color(i).g) / 255.0f;
float b = float(RGB.color(i).b) / 255.0f;
float minDist = FLT_MAX;
for (uint p = 0; p < 8; p++)
{
// Compute M.
float M = float(alphas[p]) / 255.0f * (1 - threshold) + threshold;
// Decode color.
float fr = r * M;
float fg = g * M;
float fb = b * M;
// Measure error.
float error = square(R - fr) + square(G - fg) + square(B - fb);
minDist = min(error, minDist);
}
totalError += minDist * src.weights[i];
if (totalError > bestError)
{
// early out
return totalError;
}
}
return totalError;
}
static void computeAlphaIndices_RGBM(const ColorSet & src, const ColorBlock & RGB, AlphaBlockDXT5 * dst)
{
uint8 alphas[8];
dst->evaluatePalette(alphas, /*d3d9=*/false); // @@ Use target decoder.
for (uint i = 0; i < 16; i++)
{
float R = src.color(i).x;
float G = src.color(i).y;
float B = src.color(i).z;
float r = float(RGB.color(i).r) / 255.0f;
float g = float(RGB.color(i).g) / 255.0f;
float b = float(RGB.color(i).b) / 255.0f;
float minDist = FLT_MAX;
int bestIndex = 8;
for (uint p = 0; p < 8; p++)
{
// Compute M.
float M = float(alphas[p]) / 255.0f * (1 - threshold) + threshold;
// Decode color.
float fr = r * M;
float fg = g * M;
float fb = b * M;
// Measure error.
float error = square(R - fr) + square(G - fg) + square(B - fb);
if (error < minDist)
{
minDist = error;
bestIndex = p;
}
}
nvDebugCheck(bestIndex < 8);
dst->setIndex(i, bestIndex);
}
}
void OptimalCompress::compressDXT5A_RGBM(const ColorSet & src, const ColorBlock & RGB, AlphaBlockDXT5 * dst)
{
uint8 mina = 255;
uint8 maxa = 0;
uint8 mina_no01 = 255;
uint8 maxa_no01 = 0;
// Get min/max alpha.
/*for (uint i = 0; i < 16; i++)
{
uint8 alpha = src.alpha[i];
mina = min(mina, alpha);
maxa = max(maxa, alpha);
if (alpha != 0 && alpha != 255) {
mina_no01 = min(mina_no01, alpha);
maxa_no01 = max(maxa_no01, alpha);
}
}*/
mina = 0;
maxa = 255;
mina_no01 = 0;
maxa_no01 = 255;
/*if (maxa - mina < 8) {
dst->alpha0 = maxa;
dst->alpha1 = mina;
nvDebugCheck(computeAlphaError(src, dst) == 0);
}
else if (maxa_no01 - mina_no01 < 6) {
dst->alpha0 = mina_no01;
dst->alpha1 = maxa_no01;
nvDebugCheck(computeAlphaError(src, dst) == 0);
}
else*/
{
float besterror = computeAlphaError_RGBM(src, RGB, dst);
int besta0 = maxa;
int besta1 = mina;
// Expand search space a bit.
const int alphaExpand = 8;
mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand;
maxa = (maxa >= 255 - alphaExpand) ? 255 : maxa + alphaExpand;
for (int a0 = mina + 9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0 - 8; a1++)
{
nvDebugCheck(a0 - a1 > 8);
dst->alpha0 = a0;
dst->alpha1 = a1;
float error = computeAlphaError_RGBM(src, RGB, dst, besterror);
if (error < besterror)
{
besterror = error;
besta0 = a0;
besta1 = a1;
}
}
}
// Try using the 6 step encoding.
/*if (mina == 0 || maxa == 255)*/ {
// Expand search space a bit.
const int alphaExpand = 6;
mina_no01 = (mina_no01 <= alphaExpand) ? 0 : mina_no01 - alphaExpand;
maxa_no01 = (maxa_no01 >= 255 - alphaExpand) ? 255 : maxa_no01 + alphaExpand;
for (int a0 = mina_no01 + 9; a0 < maxa_no01; a0++)
{
for (int a1 = mina_no01; a1 < a0 - 8; a1++)
{
nvDebugCheck(a0 - a1 > 8);
dst->alpha0 = a1;
dst->alpha1 = a0;
float error = computeAlphaError_RGBM(src, RGB, dst, besterror);
if (error < besterror)
{
besterror = error;
besta0 = a1;
besta1 = a0;
}
}
}
}
dst->alpha0 = besta0;
dst->alpha1 = besta1;
}
computeAlphaIndices_RGBM(src, RGB, dst);
}

View File

@ -25,31 +25,38 @@
#ifndef NV_TT_OPTIMALCOMPRESSDXT_H
#define NV_TT_OPTIMALCOMPRESSDXT_H
#include <nvimage/nvimage.h>
//#include "nvimage/nvimage.h"
#include <nvmath/Color.h>
#include "nvmath/Color.h"
namespace nv
{
struct ColorSet;
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
struct AlphaBlock4x4;
namespace OptimalCompress
{
// Single color compressors:
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, uint alphaMask, BlockDXT1 * dxtBlock);
void compressDXT1G(uint8 g, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
void compressDXT3A(const AlphaBlock4x4 & src, AlphaBlockDXT3 * dst);
void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst);
void compressDXT1_Luma(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT1G(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT3A(const ColorBlock & src, AlphaBlockDXT3 * dst);
void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst);
void compressDXT1_Luma(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT5A_RGBM(const ColorSet & src, const ColorBlock & RGB, AlphaBlockDXT5 * dst);
}
} // nv namespace

View File

@ -28,13 +28,13 @@
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
#include "nvmath/Color.h"
#include "nvmath/Color.inl"
#include "nvmath/Vector.inl"
#include "nvmath/Fitting.h"
#include "nvcore/Utils.h" // swap
#include <string.h> // memset
using namespace nv;
using namespace QuickCompress;
@ -115,13 +115,28 @@ inline static void insetBBox(Vector3 * restrict maxColor, Vector3 * restrict min
*minColor = clamp(*minColor + inset, 0.0f, 255.0f);
}
#include "nvmath/ftoi.h"
// Takes a normalized color in [0, 255] range and returns
inline static uint16 roundAndExpand(Vector3 * restrict v)
{
uint r = uint(clamp(v->x * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f);
uint g = uint(clamp(v->y * (63.0f / 255.0f), 0.0f, 63.0f) + 0.5f);
uint b = uint(clamp(v->z * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f);
uint r = ftoi_floor(clamp(v->x * (31.0f / 255.0f), 0.0f, 31.0f));
uint g = ftoi_floor(clamp(v->y * (63.0f / 255.0f), 0.0f, 63.0f));
uint b = ftoi_floor(clamp(v->z * (31.0f / 255.0f), 0.0f, 31.0f));
float r0 = float(((r+0) << 3) | ((r+0) >> 2));
float r1 = float(((r+1) << 3) | ((r+1) >> 2));
if (fabs(v->x - r1) < fabs(v->x - r0)) r = min(r+1, 31U);
float g0 = float(((g+0) << 2) | ((g+0) >> 4));
float g1 = float(((g+1) << 2) | ((g+1) >> 4));
if (fabs(v->y - g1) < fabs(v->y - g0)) g = min(g+1, 63U);
float b0 = float(((b+0) << 3) | ((b+0) >> 2));
float b1 = float(((b+1) << 3) | ((b+1) >> 2));
if (fabs(v->z - b1) < fabs(v->z - b0)) b = min(b+1, 31U);
uint16 w = (r << 11) | (g << 5) | b;
r = (r << 3) | (r >> 2);
@ -132,16 +147,57 @@ inline static uint16 roundAndExpand(Vector3 * restrict v)
return w;
}
// Takes a normalized color in [0, 255] range and returns
inline static uint16 roundAndExpand01(Vector3 * restrict v)
{
uint r = ftoi_floor(clamp(v->x * 31.0f, 0.0f, 31.0f));
uint g = ftoi_floor(clamp(v->y * 63.0f, 0.0f, 63.0f));
uint b = ftoi_floor(clamp(v->z * 31.0f, 0.0f, 31.0f));
float r0 = float(((r+0) << 3) | ((r+0) >> 2));
float r1 = float(((r+1) << 3) | ((r+1) >> 2));
if (fabs(v->x - r1) < fabs(v->x - r0)) r = min(r+1, 31U);
float g0 = float(((g+0) << 2) | ((g+0) >> 4));
float g1 = float(((g+1) << 2) | ((g+1) >> 4));
if (fabs(v->y - g1) < fabs(v->y - g0)) g = min(g+1, 63U);
float b0 = float(((b+0) << 3) | ((b+0) >> 2));
float b1 = float(((b+1) << 3) | ((b+1) >> 2));
if (fabs(v->z - b1) < fabs(v->z - b0)) b = min(b+1, 31U);
uint16 w = (r << 11) | (g << 5) | b;
r = (r << 3) | (r >> 2);
g = (g << 2) | (g >> 4);
b = (b << 3) | (b >> 2);
*v = Vector3(float(r) / 255.0f, float(g) / 255.0f, float(b) / 255.0f);
return w;
}
inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1)
{
return dot(c0-c1, c0-c1);
}
Vector3 round255(const Vector3 & v) {
//return Vector3(ftoi_round(255 * v.x), ftoi_round(255 * v.y), ftoi_round(255 * v.z)) * (1.0f / 255);
//return Vector3(floorf(v.x + 0.5f), floorf(v.y + 0.5f), floorf(v.z + 0.5f));
return v;
}
inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 palette[4];
palette[0] = maxColor;
palette[1] = minColor;
//palette[2] = round255((2 * palette[0] + palette[1]) / 3.0f);
//palette[3] = round255((2 * palette[1] + palette[0]) / 3.0f);
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
@ -178,32 +234,58 @@ inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor,
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
Vector3 mem[(4+2)*2];
memset(mem, 0, sizeof(mem));
Vector3 * row0 = mem;
Vector3 * row1 = mem + (4+2);
uint indices = 0;
for(int i = 0; i < 16; i++)
{
if (!set.isValidIndex(i)) {
// Skip masked pixels and out of bounds.
continue;
//for(int i = 0; i < 16; i++)
for (uint y = 0; y < 4; y++) {
for (uint x = 0; x < 4; x++) {
int i = y*4+x;
if (!set.isValidIndex(i)) {
// Skip masked pixels and out of bounds.
continue;
}
Vector3 color = set.color(i).xyz();
// Add error.
color += row0[1+x];
float d0 = colorDistance(palette[0], color);
float d1 = colorDistance(palette[1], color);
float d2 = colorDistance(palette[2], color);
float d3 = colorDistance(palette[3], color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
int index = x2 | ((x0 | x1) << 1);
indices |= index << (2 * i);
// Compute new error.
Vector3 diff = color - palette[index];
// Propagate new error.
//row0[1+x+1] += 7.0f / 16.0f * diff;
//row1[1+x-1] += 3.0f / 16.0f * diff;
//row1[1+x+0] += 5.0f / 16.0f * diff;
//row1[1+x+1] += 1.0f / 16.0f * diff;
}
Vector3 color = set.color(i).xyz();
float d0 = colorDistance(palette[0], color);
float d1 = colorDistance(palette[1], color);
float d2 = colorDistance(palette[2], color);
float d3 = colorDistance(palette[3], color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
swap(row0, row1);
memset(row1, 0, sizeof(row1));
}
return indices;
@ -214,6 +296,8 @@ inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg
Vector3 palette[4];
palette[0] = maxColor;
palette[1] = minColor;
//palette[2] = round255((2 * palette[0] + palette[1]) / 3.0f);
//palette[3] = round255((2 * palette[1] + palette[0]) / 3.0f);
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
@ -231,6 +315,30 @@ inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg
return total;
}
inline static float evaluatePaletteError3(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 palette[4];
palette[0] = minColor;
palette[1] = maxColor;
palette[2] = (palette[0] + palette[1]) * 0.5f;
palette[3] = Vector3(0);
float total = 0.0f;
for (int i = 0; i < 16; i++)
{
float d0 = colorDistance(palette[0], block[i]);
float d1 = colorDistance(palette[1], block[i]);
float d2 = colorDistance(palette[2], block[i]);
//float d3 = colorDistance(palette[3], block[i]);
//total += min(min(d0, d1), min(d2, d3));
total += min(min(d0, d1), d2);
}
return total;
}
// maxColor and minColor are expected to be in the same range as the color set.
inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor)
{
@ -392,7 +500,7 @@ static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock)
namespace
{
static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
static uint computeAlphaIndices(const AlphaBlock4x4 & src, AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas, false); // @@ Use target decoder.
@ -401,7 +509,7 @@ namespace
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint8 alpha = src.alpha[i];
uint besterror = 256*256;
uint best = 8;
@ -425,7 +533,7 @@ namespace
return totalError;
}
static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
static void optimizeAlpha8(const AlphaBlock4x4 & src, AlphaBlockDXT5 * block)
{
float alpha2_sum = 0;
float beta2_sum = 0;
@ -445,8 +553,8 @@ namespace
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * rgba.color(i).a;
betax_sum += beta * rgba.color(i).a;
alphax_sum += alpha * src.alpha[i];
betax_sum += beta * src.alpha[i];
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
@ -653,14 +761,20 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
}
void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
void QuickCompress::compressDXT3(const ColorBlock & src, BlockDXT3 * dxtBlock)
{
compressDXT1(rgba, &dxtBlock->color);
OptimalCompress::compressDXT3A(rgba, &dxtBlock->alpha);
compressDXT1(src, &dxtBlock->color);
OptimalCompress::compressDXT3A(src, &dxtBlock->alpha);
}
void QuickCompress::compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount/*=8*/)
{
AlphaBlock4x4 tmp;
tmp.init(src, 3);
compressDXT5A(tmp, dst, iterationCount);
}
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount/*=8*/)
void QuickCompress::compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount/*=8*/)
{
uint8 alpha0 = 0;
uint8 alpha1 = 255;
@ -668,7 +782,7 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint8 alpha = src.alpha[i];
alpha0 = max(alpha0, alpha);
alpha1 = min(alpha1, alpha);
}
@ -676,14 +790,14 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
AlphaBlockDXT5 block;
block.alpha0 = alpha0 - (alpha0 - alpha1) / 34;
block.alpha1 = alpha1 + (alpha0 - alpha1) / 34;
uint besterror = computeAlphaIndices(rgba, &block);
uint besterror = computeAlphaIndices(src, &block);
AlphaBlockDXT5 bestblock = block;
for (int i = 0; i < iterationCount; i++)
{
optimizeAlpha8(rgba, &block);
uint error = computeAlphaIndices(rgba, &block);
optimizeAlpha8(src, &block);
uint error = computeAlphaIndices(src, &block);
if (error >= besterror)
{
@ -701,7 +815,7 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
};
// Copy best block to result;
*dxtBlock = bestblock;
*dst = bestblock;
}
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/)
@ -752,3 +866,108 @@ void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, co
//optimizeEndPoints3(set, block);
}
inline Vector3 toVectorColor(int r, int g, int b) {
Vector3 c;
c.x = float((r << 3) | (r >> 2));
c.y = float((g << 2) | (g >> 4));
c.z = float((b << 3) | (b >> 2));
return c;
}
// Do an exhaustive search inside the bounding box.
void compress_dxt1_bounding_box_exhaustive(const ColorBlock & input, BlockDXT1 * output)
{
int min_r = 255, min_g = 255, min_b = 255;
int max_r = 0, max_g = 0, max_b = 0;
for (int i = 0; i < 16; i++) {
Color32 c = input.color(i);
min_r = min(min_r, int(c.r));
max_r = max(max_r, int(c.r));
min_g = min(min_g, int(c.g));
max_g = max(max_g, int(c.g));
min_b = min(min_b, int(c.b));
max_b = max(max_b, int(c.b));
}
// Convert to 5:6:5
min_r >>= 3; min_g >>= 2; min_b >>= 3;
max_r >>= 3; max_g >>= 2; max_b >>= 3;
// Expand the box.
int range_r = max_r - min_r;
int range_g = max_g - min_g;
int range_b = max_b - min_b;
min_r = max(0, min_r - (range_r + 1) / 1 - 1);
min_g = max(0, min_g - (range_g + 1) / 1 - 1);
min_b = max(0, min_b - (range_b + 1) / 1 - 1);
max_r = min(31, max_r + (range_r + 1) / 2 + 1);
max_g = min(63, max_g + (range_g + 1) / 2 + 1);
max_b = min(31, max_b + (range_b + 1) / 2 + 1);
int count = (max_r-min_r) + (max_g-min_g) + (max_b-min_b);
Vector3 colors[16];
extractColorBlockRGB(input, colors);
// @@ Use a single loop and remap index to box location?
float bestError = FLT_MAX;
Vector3 best0, best1;
bool threeColorMode;
for(int r0 = min_r; r0 <= max_r; r0++)
for(int r1 = max_r; r1 >= r0; r1--)
for(int g0 = min_g; g0 <= max_g; g0++)
for(int g1 = max_g; g1 >= g0; g1--)
for(int b0 = min_b; b0 <= max_b; b0++)
for(int b1 = max_b; b1 >= b0; b1--)
{
Vector3 c0 = toVectorColor(r0, g0, b0);
Vector3 c1 = toVectorColor(r1, g1, b1);
// Compute palette and evaluate error for these endpoints.
float error = evaluatePaletteError4(colors, c1, c0);
if (error < bestError) {
bestError = error;
best0 = c1; // c0 > c1
best1 = c0;
threeColorMode = false;
}
#if 0
error = evaluatePaletteError3(colors, /*maxColor=*/c1, /*minColor=*/c0);
if (error < bestError) {
bestError = error;
best0 = c0;
best1 = c1;
threeColorMode = true;
}
#endif
}
uint16 color0 = roundAndExpand(&best0);
uint16 color1 = roundAndExpand(&best1);
if (threeColorMode) {
nvCheck(color0 <= color1);
output->col0 = Color16(color1);
output->col1 = Color16(color0);
output->indices = computeIndices3(colors, best0, best1);
}
else {
nvCheck(color0 >= color1);
output->col0 = Color16(color0);
output->col1 = Color16(color1);
output->indices = computeIndices4(colors, best0, best1);
}
}

View File

@ -31,6 +31,7 @@ namespace nv
{
struct ColorBlock;
struct ColorSet;
struct AlphaBlock4x4;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
@ -40,13 +41,15 @@ namespace nv
namespace QuickCompress
{
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT1a(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock);
void compressDXT3(const ColorBlock & src, BlockDXT3 * dst);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8);
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8);
void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount=8);
void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount=8);
void compressDXT5(const ColorBlock & src, BlockDXT5 * dst, int iterationCount=8);
void outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
void outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);

View File

@ -28,6 +28,7 @@
#include "nvmath/Matrix.inl"
#include "nvmath/Color.h"
#include "nvmath/Half.h"
#include "nvmath/ftoi.h"
#include "nvimage/Filter.h"
#include "nvimage/ImageIO.h"
@ -78,13 +79,13 @@ namespace
else if (format == Format_DXT3) {
return 16;
}
else if (format == Format_DXT5 || format == Format_DXT5n) {
else if (format == Format_DXT5 || format == Format_DXT5n || format == Format_BC3_RGBM) {
return 16;
}
else if (format == Format_BC4) {
return 8;
}
else if (format == Format_BC5) {
else if (format == Format_BC5 || format == Format_BC5_Luma) {
return 16;
}
else if (format == Format_CTX1) {
@ -347,13 +348,13 @@ int Surface::countMipmaps(int min_size) const
return ::countMipmapsWithMinSize(m->image->width(), m->image->height(), 1, min_size);
}
float Surface::alphaTestCoverage(float alphaRef/*= 0.5*/) const
float Surface::alphaTestCoverage(float alphaRef/*= 0.5*/, int alpha_channel/*=3*/) const
{
if (m->image == NULL) return 0.0f;
alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256);
return m->image->alphaTestCoverage(alphaRef, 3);
return m->image->alphaTestCoverage(alphaRef, alpha_channel);
}
float Surface::average(int channel, int alpha_channel/*= -1*/, float gamma /*= 2.2f*/) const
@ -419,7 +420,7 @@ void Surface::histogram(int channel, float rangeMin, float rangeMax, int binCoun
const uint count = m->image->pixelCount();
for (uint i = 0; i < count; i++) {
float f = c[i] * scale + bias;
int idx = ifloor(f);
int idx = ftoi_floor(f);
if (idx < 0) idx = 0;
if (idx > binCount-1) idx = binCount-1;
binPtr[idx]++;
@ -434,18 +435,17 @@ void Surface::range(int channel, float * rangeMin, float * rangeMax, int alpha_c
if (alpha_channel == -1) { // no alpha channel; just like the original range function
if (m->image != NULL)
{
float * c = img->channel(channel);
if (m->image != NULL) {
float * c = img->channel(channel);
const uint count = img->pixelCount();
for (uint p = 0; p < count; p++) {
float f = c[p];
if (f < range.x) range.x = f;
if (f > range.y) range.y = f;
const uint count = img->pixelCount();
for (uint p = 0; p < count; p++) {
float f = c[p];
if (f < range.x) range.x = f;
if (f > range.y) range.y = f;
}
}
}
}
else { // use alpha test to ignore some pixels
//note, it's quite possible to get FLT_MAX,-FLT_MAX back if all pixels fail the test
@ -623,6 +623,23 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
return false;
}
}
else if (format == InputFormat_R_32F)
{
const float * src = (const float *)data;
TRY {
for (int i = 0; i < count; i++)
{
rdst[i] = src[i];
gdst[i] = 0;
bdst[i] = 0;
adst[i] = 0;
}
}
CATCH {
return false;
}
}
return true;
}
@ -695,6 +712,20 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r,
return false;
}
}
else if (format == InputFormat_R_32F)
{
const float * rsrc = (const float *)r;
TRY {
memcpy(rdst, rsrc, count * sizeof(float));
memset(gdst, 0, count * sizeof(float));
memset(bdst, 0, count * sizeof(float));
memset(adst, 0, count * sizeof(float));
}
CATCH {
return false;
}
}
return true;
}
@ -703,12 +734,12 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r,
bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const void * data)
{
if (format != nvtt::Format_BC1 &&
format != nvtt::Format_BC2 &&
format != nvtt::Format_BC3 &&
format != nvtt::Format_BC4 &&
format != nvtt::Format_BC5 &&
format != nvtt::Format_BC6 &&
format != nvtt::Format_BC7)
format != nvtt::Format_BC2 &&
format != nvtt::Format_BC3 &&
format != nvtt::Format_BC4 &&
format != nvtt::Format_BC5 &&
format != nvtt::Format_BC6 &&
format != nvtt::Format_BC7)
{
return false;
}
@ -1466,7 +1497,7 @@ void Surface::fill(float red, float green, float blue, float alpha)
}
void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/)
void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/, int alpha_channel/*= 3*/)
{
if (isNull()) return;
@ -1474,7 +1505,7 @@ void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/)
alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256);
m->image->scaleAlphaToCoverage(coverage, alphaRef, 3);
m->image->scaleAlphaToCoverage(coverage, alphaRef, alpha_channel);
}
/*bool Surface::normalizeRange(float * rangeMin, float * rangeMax)
@ -1507,7 +1538,7 @@ void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/)
// Ideally you should compress/quantize the RGB and M portions independently.
// Once you have M quantized, you would compute the corresponding RGB and quantize that.
void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.0f*/)
void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
{
if (isNull()) return;
@ -1523,60 +1554,71 @@ void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.0f*/)
const uint count = img->pixelCount();
for (uint i = 0; i < count; i++) {
float R = r[i];
float G = g[i];
float B = b[i];
#if 1
float M = nv::clamp(max(max(R, G), max(B, threshold)), 0.0f, range);
float R = nv::clamp(r[i], 0.0f, 1.0f);
float G = nv::clamp(g[i], 0.0f, 1.0f);
float B = nv::clamp(b[i], 0.0f, 1.0f);
r[i] = nv::clamp(R / M, 0.0f, 1.0f);
g[i] = nv::clamp(G / M, 0.0f, 1.0f);
b[i] = nv::clamp(B / M, 0.0f, 1.0f);
#if 0
// Baseline, no compression:
r[i] = R;
g[i] = G;
b[i] = B;
a[i] = 1;
a[i] = (M - threshold) / (range - threshold);
#elif 0
float M = max(max(R, G), max(B, threshold));
r[i] = R / M;
g[i] = G / M;
b[i] = B / M;
a[i] = (M - threshold) / (1 - threshold);
#else
// The optimal compressor theoretically produces the best results, but unfortunately introduces
// severe interpolation errors!
// The optimal compressor produces the best results, but can introduce interpolation errors!
float bestM;
float bestError = FLT_MAX;
int minM = iround(min(R, G, B) * 255.0f);
float M = max(max(R, G), max(B, threshold));
int iM = ftoi_ceil((M - threshold) / (1 - threshold) * 255.0f);
for (int m = minM; m < 256; m++) {
//for (int m = 0; m < 256; m++) { // If we use the entire search space, interpolation errors are very likely to occur.
for (int m = max(iM-16, 0); m < min(iM+16, 256); m++) { // If we constrain the search space, these errors disappear.
float fm = float(m) / 255.0f;
// Decode M
float M = fm * (1 - threshold) + threshold;
// Encode.
int ir = iround(255.0f * nv::clamp(R / fm, 0.0f, 1.0f));
int ig = iround(255.0f * nv::clamp(G / fm, 0.0f, 1.0f));
int ib = iround(255.0f * nv::clamp(B / fm, 0.0f, 1.0f));
int ir = ftoi_round(255.0f * nv::saturate(R / M));
int ig = ftoi_round(255.0f * nv::saturate(G / M));
int ib = ftoi_round(255.0f * nv::saturate(B / M));
// Decode.
float fr = (float(ir) / 255.0f) * fm;
float fg = (float(ig) / 255.0f) * fm;
float fb = (float(ib) / 255.0f) * fm;
float fr = (float(ir) / 255.0f) * M;
float fg = (float(ig) / 255.0f) * M;
float fb = (float(ib) / 255.0f) * M;
// Measure error.
float error = square(R-fr) + square(G-fg) + square(B-fb);
if (error < bestError) {
bestError = error;
bestM = fm;
bestM = M;
}
}
M = bestM;
r[i] = nv::clamp(R / M, 0.0f, 1.0f);
g[i] = nv::clamp(G / M, 0.0f, 1.0f);
b[i] = nv::clamp(B / M, 0.0f, 1.0f);
a[i] = M;
r[i] = nv::saturate(R / M);
g[i] = nv::saturate(G / M);
b[i] = nv::saturate(B / M);
a[i] = (M - threshold) / (1 - threshold);
#endif
}
}
void Surface::fromRGBM(float range/*= 1*/, float threshold/*= 0.0*/)
// @@ IC: Dubious merge. Review!
void Surface::fromRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
{
if (isNull()) return;
@ -1798,7 +1840,7 @@ void Surface::toRGBE(int mantissaBits, int exponentBits)
double denom = pow(2.0, double(E - exponentBias - mantissaBits));
// Refine exponent:
int m = iround(float(M / denom));
int m = ftoi_round(float(M / denom));
nvDebugCheck(m <= (1 << mantissaBits));
if (m == (1 << mantissaBits)) {
@ -1866,10 +1908,10 @@ void Surface::fromRGBE(int mantissaBits, int exponentBits)
const uint count = img->pixelCount();
for (uint i = 0; i < count; i++) {
// Expand normalized float to to 9995
int R = iround(r[i] * ((1 << mantissaBits) - 1));
int G = iround(g[i] * ((1 << mantissaBits) - 1));
int B = iround(b[i] * ((1 << mantissaBits) - 1));
int E = iround(a[i] * ((1 << exponentBits) - 1));
int R = ftoi_round(r[i] * ((1 << mantissaBits) - 1));
int G = ftoi_round(g[i] * ((1 << mantissaBits) - 1));
int B = ftoi_round(b[i] * ((1 << mantissaBits) - 1));
int E = ftoi_round(a[i] * ((1 << exponentBits) - 1));
//float scale = ldexpf(1.0f, E - exponentBias - mantissaBits);
float scale = powf(2, float(E - exponentBias - mantissaBits));
@ -2741,8 +2783,8 @@ bool Surface::copy(const Surface & srcImage, int xsrc, int ysrc, int zsrc, int x
FloatImage * dst = m->image;
const FloatImage * src = srcImage.m->image;
if (toU32(xsrc + xsize) > src->width() || toU32(ysrc + ysize) > src->height() || toU32(zsrc + zsize) > src->depth()) return false;
if (toU32(xdst + xsize) > dst->width() || toU32(ydst + ysize) > dst->height() || toU32(zdst + zsize) > dst->depth()) return false;
if (U32(xsrc + xsize) > src->width() || U32(ysrc + ysize) > src->height() || U32(zsrc + zsize) > src->depth()) return false;
if (U32(xdst + xsize) > dst->width() || U32(ydst + ysize) > dst->height() || U32(zdst + zsize) > dst->depth()) return false;
detach();
@ -2765,6 +2807,65 @@ bool Surface::copy(const Surface & srcImage, int xsrc, int ysrc, int zsrc, int x
}
// Draw colored border around atlas elements.
void Surface::setAtlasBorder(int aw, int ah, float r, float g, float b, float a)
{
if (isNull()) return;
if (aw <= 0) return;
if (ah <= 0) return;
detach();
FloatImage * img = m->image;
const uint w = img->width();
const uint h = img->height();
const uint d = img->depth();
// @@ Ideally the reminder of these divisions should be 0.
uint tile_height = h / ah;
uint tile_width = w / aw;
// Note that this renders two consecutive lines between tiles. In theory we could just have one, but this way I think we have better rotation invariance.
for (uint z = 0; z < d; z++)
{
// Horizontal lines:
for (uint i = 0, y = 0; i < uint(ah); i++, y += tile_height)
{
for (uint x = 0; x < w; x++)
{
img->pixel(0, x, y, z) = r;
img->pixel(1, x, y, z) = g;
img->pixel(2, x, y, z) = b;
img->pixel(3, x, y, z) = a;
img->pixel(0, x, y + tile_height - 1, z) = r;
img->pixel(1, x, y + tile_height - 1, z) = g;
img->pixel(2, x, y + tile_height - 1, z) = b;
img->pixel(3, x, y + tile_height - 1, z) = a;
}
}
// Vertical lines:
for (uint i = 0, x = 0; i < uint(ah); i++, x += tile_width)
{
for (uint y = 0; y < h; y++)
{
img->pixel(0, x, y, z) = r;
img->pixel(1, x, y, z) = g;
img->pixel(2, x, y, z) = b;
img->pixel(3, x, y, z) = a;
img->pixel(0, x + tile_width - 1, y, z) = r;
img->pixel(1, x + tile_width - 1, y, z) = g;
img->pixel(2, x + tile_width - 1, y, z) = b;
img->pixel(3, x + tile_width - 1, y, z) = a;
}
}
}
}
float nvtt::rmsError(const Surface & reference, const Surface & image)
{
@ -2839,5 +2940,24 @@ Surface nvtt::diff(const Surface & reference, const Surface & image, float scale
return diffImage;
}
float nvtt::rmsToneMappedError(const Surface & reference, const Surface & img, float exposure)
{
// @@ We could do this in the rms function without having to create image copies.
Surface r = reference;
Surface i = img;
// @@ Ideally we should use our Reindhart operator. Add Reindhart_L & Reindhart_M ?
float scale = 1.0f / exposure;
r.scaleBias(0, scale, 0); r.scaleBias(1, scale, 0); r.scaleBias(2, scale, 0);
r.toneMap(ToneMapper_Reindhart, NULL);
r.toSrgb();
i.scaleBias(0, scale, 0); i.scaleBias(1, scale, 0); i.scaleBias(2, scale, 0);
i.toneMap(ToneMapper_Reindhart, NULL);
i.toSrgb();
return nv::rmsColorError(r.m->image, i.m->image, reference.alphaMode() == nvtt::AlphaMode_Transparency);
}

View File

@ -41,7 +41,7 @@ public:
return out;
}
int getptr() { return bptr; }
int setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
int getsize() { return bend; }
private:
@ -60,8 +60,7 @@ private:
return bit != 0;
}
void writeone(int bit) {
if (readonly)
throw "Writing a read-only bit stream";
nvAssert (!readonly); // "Writing a read-only bit stream"
nvAssert (bptr < maxbits);
if (bptr >= maxbits) return;
if (bit&1)

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations
#ifndef _ZOH_TILE_H
#define _ZOH_TILE_H
#include "utils.h"
#include "zoh_utils.h"
#include "nvmath/Vector.h"
#include <math.h>

View File

@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations
// Utility and common routines
#include "utils.h"
#include "zoh_utils.h"
#include "nvmath/Vector.inl"
#include <math.h>

View File

@ -16,7 +16,7 @@ See the License for the specific language governing permissions and limitations
#include "bits.h"
#include "tile.h"
#include "zoh.h"
#include "utils.h"
#include "zoh_utils.h"
#include "nvmath/Vector.inl"
#include "nvmath/Fitting.h"
@ -591,13 +591,14 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
{
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
++np;
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x, y, shapeindex) == region) {
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
++np;
}
}
}
optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
@ -660,7 +661,9 @@ float ZOH::refineone(const Tile &tile, int shapeindex_best, const FltEndpts endp
}
}
}
throw "No candidate found, should never happen (refineone.)";
nvAssert (false); // "No candidate found, should never happen (refineone.)";
return FLT_MAX;
}
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], Vector3 palette[NREGIONS_ONE][NINDICES])

View File

@ -40,7 +40,7 @@ See the License for the specific language governing permissions and limitations
#include "bits.h"
#include "tile.h"
#include "zoh.h"
#include "utils.h"
#include "zoh_utils.h"
#include "nvmath/Fitting.h"
#include "nvmath/Vector.inl"
@ -747,7 +747,8 @@ float ZOH::refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endp
}
}
}
throw "No candidate found, should never happen (refinetwo.)";
nvAssert(false); //throw "No candidate found, should never happen (refinetwo.)";
return FLT_MAX;
}
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_TWO], Vector3 palette[NREGIONS_TWO][NINDICES])

View File

@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
#include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h"
#include "avpcl_utils.h"
#include "endpts.h"
#include <cstring>
#include <float.h>
@ -394,7 +394,7 @@ void AVPCL::decompress_mode0(const char *block, Tile &t)
}
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{
Vector4 palette[NINDICES];
float toterr = 0;
@ -404,11 +404,11 @@ static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &en
for (int i = 0; i < np; ++i)
{
float err, besterr = FLT_MAX;
float besterr = FLT_MAX;
for (int j = 0; j < NINDICES && besterr > 0; ++j)
{
err = Utils::metric4(colors[i], palette[j]);
float err = Utils::metric4(colors[i], palette[j]) * importance[i];
if (err > besterr) // error increased, so we're done searching
break;
@ -472,7 +472,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error)
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{
// we have the old endpoints: old_endpts
@ -511,7 +511,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
continue;
}
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
if (err < min_err)
{
@ -543,7 +543,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
// for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
{
IntEndptsRGB_2 temp_endpts;
float best_err = orig_err;
@ -593,7 +593,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -613,7 +613,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -636,7 +636,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
return best_err;
}
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
{
float opt_err = orig_err;
@ -675,8 +675,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
if (err0 < err1)
{
@ -712,7 +712,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
if (err >= opt_err)
break;
@ -746,7 +746,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
bool first = true;
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
{
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err)
{
@ -786,6 +786,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
{
Vector4 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
IntEndptsRGB_2 temp_in, temp_out;
int temp_indices[Tile::TILE_TOTAL];
@ -794,10 +795,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
pixels[np++] = tile.data[y][x];
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x, y, shapeindex) == region) {
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
np++;
}
}
}
opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region];
@ -812,10 +818,10 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// make sure we have a valid error for temp_in
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
// now try to optimize these endpoints
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err)
@ -890,7 +896,8 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
}
}
}
throw "No candidate found, should never happen (mode avpcl 0).";
nvAssert(false); // throw "No candidate found, should never happen (mode avpcl 0).";
return FLT_MAX;
}
static void clamp(Vector4 &v)

View File

@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
#include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h"
#include "avpcl_utils.h"
#include "endpts.h"
#include <cstring>
#include <float.h>
@ -378,7 +378,7 @@ void AVPCL::decompress_mode1(const char *block, Tile &t)
}
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_1 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_1 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{
Vector4 palette[NINDICES];
float toterr = 0;
@ -388,11 +388,11 @@ static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_1 &en
for (int i = 0; i < np; ++i)
{
float err, besterr = FLT_MAX;
float besterr = FLT_MAX;
for (int j = 0; j < NINDICES && besterr > 0; ++j)
{
err = Utils::metric4(colors[i], palette[j]);
float err = Utils::metric4(colors[i], palette[j]) * importance[i];
if (err > besterr) // error increased, so we're done searching
break;
@ -456,7 +456,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endp
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error)
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts,
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts,
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{
// we have the old endpoints: old_endpts
@ -495,7 +495,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
continue;
}
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
if (err < min_err)
{
@ -527,7 +527,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
// for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL])
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL])
{
IntEndptsRGB_1 temp_endpts;
float best_err = orig_err;
@ -577,7 +577,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -597,7 +597,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -619,7 +619,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
return best_err;
}
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB_1 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_1 &opt_endpts)
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_1 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_1 &opt_endpts)
{
float opt_err = orig_err;
@ -658,8 +658,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
if (err0 < err1)
{
@ -695,7 +695,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
if (err >= opt_err)
break;
@ -729,7 +729,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
bool first = true;
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
{
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err)
{
@ -768,6 +768,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
IntEndptsRGB_1 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_1 opt_endpts[NREGIONS])
{
Vector4 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
IntEndptsRGB_1 temp_in, temp_out;
int temp_indices[Tile::TILE_TOTAL];
@ -776,10 +777,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
pixels[np++] = tile.data[y][x];
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x, y, shapeindex) == region) {
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
np++;
}
}
}
opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region];
@ -793,10 +799,10 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// make sure we have a valid error for temp_in
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
// now try to optimize these endpoints
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err)
@ -873,7 +879,8 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
}
}
}
throw "No candidate found, should never happen (mode avpcl 1).";
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 1).";
return FLT_MAX;
}
static void clamp(Vector4 &v)
@ -909,11 +916,11 @@ static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts
for (int x = 0; x < tile.size_x; x++)
{
int region = REGION(x,y,shapeindex);
float err, besterr = FLT_MAX;
float besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i)
{
err = Utils::metric4(tile.data[y][x], palette[region][i]);
float err = Utils::metric4(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
if (err > besterr) // error increased, so we're done searching. this works for most norms.
break;

View File

@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
#include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h"
#include "avpcl_utils.h"
#include "endpts.h"
#include <cstring>
#include <float.h>
@ -342,7 +342,7 @@ void AVPCL::decompress_mode2(const char *block, Tile &t)
}
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{
Vector4 palette[NINDICES];
float toterr = 0;
@ -352,11 +352,11 @@ static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB &endp
for (int i = 0; i < np; ++i)
{
float err, besterr = FLT_MAX;
float besterr = FLT_MAX;
for (int j = 0; j < NINDICES && besterr > 0; ++j)
{
err = Utils::metric4(colors[i], palette[j]);
float err = Utils::metric4(colors[i], palette[j]) * importance[i];
if (err > besterr) // error increased, so we're done searching
break;
@ -420,7 +420,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error)
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts,
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts,
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{
// we have the old endpoints: old_endpts
@ -459,7 +459,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
continue;
}
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
if (err < min_err)
{
@ -491,7 +491,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
// for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL])
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL])
{
IntEndptsRGB temp_endpts;
float best_err = orig_err;
@ -541,7 +541,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -561,7 +561,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -584,7 +584,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
return best_err;
}
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB &opt_endpts)
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB &opt_endpts)
{
float opt_err = orig_err;
@ -623,8 +623,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
if (err0 < err1)
{
@ -660,7 +660,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
if (err >= opt_err)
break;
@ -694,7 +694,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
bool first = true;
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
{
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err)
{
@ -733,6 +733,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
const IntEndptsRGB orig_endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB opt_endpts[NREGIONS_THREE])
{
Vector4 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
IntEndptsRGB temp_in, temp_out;
for (int region=0; region<NREGIONS_THREE; ++region)
@ -740,10 +741,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
pixels[np++] = tile.data[y][x];
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x, y, shapeindex) == region) {
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
np++;
}
}
}
opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region];
@ -755,7 +761,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
float temp_in_err = orig_err[region];
// now try to optimize these endpoints
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err)
@ -829,7 +835,9 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
}
}
}
throw "No candidate found, should never happen (avpcl mode 2).";
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 2).";
return FLT_MAX;
}
static void clamp(Vector4 &v)

View File

@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
#include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h"
#include "avpcl_utils.h"
#include "endpts.h"
#include <cstring>
#include <float.h>
@ -390,7 +390,7 @@ void AVPCL::decompress_mode3(const char *block, Tile &t)
}
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{
Vector4 palette[NINDICES];
float toterr = 0;
@ -400,11 +400,11 @@ static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &en
for (int i = 0; i < np; ++i)
{
float err, besterr = FLT_MAX;
float besterr = FLT_MAX;
for (int j = 0; j < NINDICES && besterr > 0; ++j)
{
err = Utils::metric4(colors[i], palette[j]);
float err = Utils::metric4(colors[i], palette[j]) * importance[i];
if (err > besterr) // error increased, so we're done searching
break;
@ -467,7 +467,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error)
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{
// we have the old endpoints: old_endpts
@ -506,7 +506,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
continue;
}
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
if (err < min_err)
{
@ -538,7 +538,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
// for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
{
IntEndptsRGB_2 temp_endpts;
float best_err = orig_err;
@ -588,7 +588,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -608,7 +608,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -631,7 +631,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
return best_err;
}
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
{
float opt_err = orig_err;
@ -670,8 +670,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
if (err0 < err1)
{
@ -707,7 +707,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
if (err >= opt_err)
break;
@ -741,7 +741,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
bool first = true;
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
{
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err)
{
@ -781,6 +781,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
{
Vector4 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
IntEndptsRGB_2 temp_in, temp_out;
int temp_indices[Tile::TILE_TOTAL];
@ -789,10 +790,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
pixels[np++] = tile.data[y][x];
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x, y, shapeindex) == region) {
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
np++;
}
}
}
opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region];
@ -807,10 +813,10 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// make sure we have a valid error for temp_in
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
// now try to optimize these endpoints
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err)
@ -885,7 +891,8 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
}
}
}
throw "No candidate found, should never happen (avpcl mode 3).";
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 3).";
return FLT_MAX;
}
static void clamp(Vector4 &v)

View File

@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
#include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h"
#include "avpcl_utils.h"
#include "endpts.h"
#include <cstring>
#include <float.h>
@ -353,9 +353,9 @@ static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
if (p.transform_mode)
{
// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]); // always positive here
endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[1]);
endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[2]);
endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[3]);
endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
}
}
}
@ -422,7 +422,7 @@ void AVPCL::decompress_mode4(const char *block, Tile &t)
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
// exceeds what we already have
static float map_colors(const Vector4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{
Vector3 palette_rgb[NINDICES3]; // could be nindices2
float palette_a[NINDICES3]; // could be nindices2
@ -519,7 +519,7 @@ static float map_colors(const Vector4 colors[], int np, int rotatemode, int inde
}
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : (nvCheckMacro(0),0);
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
toterr += besterr;
// do A index
@ -647,7 +647,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
}
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : (nvCheckMacro(0),0);
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
toterr[region] += besterr;
// do A index
@ -672,7 +672,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error)
static float perturb_one(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{
// we have the old endpoints: old_endpts
@ -712,7 +712,7 @@ static float perturb_one(const Vector4 colors[], int np, int rotatemode, int ind
continue;
}
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
if (err < min_err)
{
@ -744,7 +744,7 @@ static float perturb_one(const Vector4 colors[], int np, int rotatemode, int ind
// if err > 40 6.25%
// for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan)
static float exhaustive(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{
IntEndptsRGBA temp_endpts;
float best_err = orig_err;
@ -795,7 +795,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -816,7 +816,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -841,7 +841,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
return best_err;
}
static float optimize_one(const Vector4 colors[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
{
float opt_err = orig_err;
@ -878,8 +878,8 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
if (err0 < err1)
{
@ -917,7 +917,7 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
if (err >= opt_err)
break;
@ -950,7 +950,7 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
bool first = true;
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
{
float new_err = exhaustive(colors, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err)
{
@ -990,6 +990,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
{
Vector4 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
IntEndptsRGBA temp_in, temp_out;
for (int region=0; region<NREGIONS; ++region)
@ -997,10 +998,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
pixels[np++] = tile.data[y][x];
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x, y, shapeindex) == region) {
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
np++;
}
}
}
opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region];
@ -1012,7 +1018,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
float temp_in_err = orig_err[region];
// now try to optimize these endpoints
float temp_out_err = optimize_one(pixels, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err)
@ -1093,7 +1099,8 @@ static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int i
}
}
}
throw "No candidate found, should never happen (avpcl mode 4).";
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 4).";
return FLT_MAX;
}
static void clamp(Vector4 &v)

View File

@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
#include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h"
#include "avpcl_utils.h"
#include "endpts.h"
#include <cstring>
#include <float.h>
@ -354,9 +354,9 @@ static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
if (p.transform_mode)
{
// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]); // always positive here
endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[1]);
endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[2]);
endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[3]);
endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
}
}
}
@ -423,7 +423,7 @@ void AVPCL::decompress_mode5(const char *block, Tile &t)
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
// exceeds what we already have
static float map_colors(const Vector4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{
Vector3 palette_rgb[NINDICES3]; // could be nindices2
float palette_a[NINDICES3]; // could be nindices2
@ -520,7 +520,7 @@ static float map_colors(const Vector4 colors[], int np, int rotatemode, int inde
}
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : (nvCheckMacro(0),0);
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
toterr += besterr;
// do A index
@ -648,7 +648,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
}
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : (nvCheckMacro(0),0);
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
toterr[region] += besterr;
// do A index
@ -673,7 +673,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error)
static float perturb_one(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{
// we have the old endpoints: old_endpts
@ -713,7 +713,7 @@ static float perturb_one(const Vector4 colors[], int np, int rotatemode, int ind
continue;
}
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
if (err < min_err)
{
@ -745,7 +745,7 @@ static float perturb_one(const Vector4 colors[], int np, int rotatemode, int ind
// if err > 40 6.25%
// for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan)
static float exhaustive(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{
IntEndptsRGBA temp_endpts;
float best_err = orig_err;
@ -796,7 +796,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -817,7 +817,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -842,7 +842,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
return best_err;
}
static float optimize_one(const Vector4 colors[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
{
float opt_err = orig_err;
@ -879,8 +879,8 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
if (err0 < err1)
{
@ -918,7 +918,7 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
if (err >= opt_err)
break;
@ -951,7 +951,7 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
bool first = true;
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
{
float new_err = exhaustive(colors, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err)
{
@ -991,6 +991,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
{
Vector4 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
IntEndptsRGBA temp_in, temp_out;
for (int region=0; region<NREGIONS; ++region)
@ -998,10 +999,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
pixels[np++] = tile.data[y][x];
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x, y, shapeindex) == region) {
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
np++;
}
}
}
opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region];
@ -1013,7 +1019,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
float temp_in_err = orig_err[region];
// now try to optimize these endpoints
float temp_out_err = optimize_one(pixels, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err)
@ -1094,7 +1100,8 @@ static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int i
}
}
}
throw "No candidate found, should never happen (avpcl mode 5).";
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 5).";
return FLT_MAX;
}
static void clamp(Vector4 &v)

View File

@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
#include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h"
#include "avpcl_utils.h"
#include "endpts.h"
#include <cstring>
#include <float.h>
@ -390,7 +390,7 @@ void AVPCL::decompress_mode6(const char *block, Tile &t)
}
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{
Vector4 palette[NINDICES];
float toterr = 0;
@ -470,7 +470,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error)
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{
// we have the old endpoints: old_endpts
@ -509,7 +509,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
continue;
}
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
if (err < min_err)
{
@ -541,7 +541,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
// for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
{
IntEndptsRGBA_2 temp_endpts;
float best_err = orig_err;
@ -591,7 +591,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -611,7 +611,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -634,7 +634,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
return best_err;
}
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
{
float opt_err = orig_err;
@ -673,8 +673,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
if (err0 < err1)
{
@ -710,7 +710,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
if (err >= opt_err)
break;
@ -744,7 +744,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
bool first = true;
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
{
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err)
{
@ -783,6 +783,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
{
Vector4 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
IntEndptsRGBA_2 temp_in, temp_out;
int temp_indices[Tile::TILE_TOTAL];
@ -791,10 +792,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
pixels[np++] = tile.data[y][x];
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x, y, shapeindex) == region) {
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
np++;
}
}
}
opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region];
@ -810,10 +816,10 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// make sure we have a valid error for temp_in
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
// now try to optimize these endpoints
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err)
@ -880,7 +886,8 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
return orig_toterr;
}
}
throw "No candidate found, should never happen (avpcl mode 6).";
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 6).";
return FLT_MAX;
}
static void clamp(Vector4 &v)

View File

@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
#include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h"
#include "avpcl_utils.h"
#include "endpts.h"
#include <cstring>
#include <float.h>
@ -423,7 +423,7 @@ void AVPCL::decompress_mode7(const char *block, Tile &t)
}
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{
Vector4 palette[NINDICES];
float toterr = 0;
@ -503,7 +503,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error)
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{
// we have the old endpoints: old_endpts
@ -542,7 +542,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
continue;
}
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
if (err < min_err)
{
@ -574,7 +574,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
// for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
{
IntEndptsRGBA_2 temp_endpts;
float best_err = orig_err;
@ -624,7 +624,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -644,7 +644,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b;
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
if (err < best_err)
{
amin = a;
@ -667,7 +667,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
return best_err;
}
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
{
float opt_err = orig_err;
@ -706,8 +706,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
if (err0 < err1)
{
@ -743,7 +743,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
if (err >= opt_err)
break;
@ -777,7 +777,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
bool first = true;
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
{
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err)
{
@ -816,6 +816,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
{
Vector4 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
IntEndptsRGBA_2 temp_in, temp_out;
int temp_indices[Tile::TILE_TOTAL];
@ -824,10 +825,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
pixels[np++] = tile.data[y][x];
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x, y, shapeindex) == region) {
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
np++;
}
}
}
opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region];
@ -843,10 +849,10 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
// make sure we have a valid error for temp_in
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
// now try to optimize these endpoints
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err)
@ -921,7 +927,8 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
}
}
}
throw "No candidate found, should never happen (avpcl mode 7).";
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 7).";
return FLT_MAX;
}
static void clamp(Vector4 &v)

View File

@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations
// Utility and common routines
#include "utils.h"
#include "avpcl_utils.h"
#include "avpcl.h"
#include "nvcore/Debug.h"
#include "nvmath/Vector.inl"
@ -129,7 +129,7 @@ float Utils::metric4(Vector4::Arg a, Vector4::Arg b)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else if (AVPCL::flag_nonuniform_ati)
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
@ -255,7 +255,7 @@ float Utils::metric4premult(Vector4::Arg a, Vector4::Arg b)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else if (AVPCL::flag_nonuniform_ati)
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
@ -286,7 +286,7 @@ float Utils::metric3premult_alphaout(Vector3::Arg rgb0, float a0, Vector3::Arg r
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else if (AVPCL::flag_nonuniform_ati)
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
@ -341,7 +341,7 @@ float Utils::metric3premult_alphain(Vector3::Arg rgb0, Vector3::Arg rgb1, int ro
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else if (AVPCL::flag_nonuniform_ati)
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}

View File

@ -60,8 +60,7 @@ private:
return bit != 0;
}
void writeone(int bit) {
if (readonly)
throw "Writing a read-only bit stream";
nvAssert (!readonly); // "Writing a read-only bit stream"
nvAssert (bptr < maxbits);
if (bptr >= maxbits) return;
if (bit&1)

View File

@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations
#include "nvmath/Vector.h"
#include <math.h>
#include "utils.h"
#include "avpcl_utils.h"
namespace AVPCL {
@ -28,6 +28,7 @@ public:
static const int TILE_W = 4;
static const int TILE_TOTAL = TILE_H * TILE_W;
nv::Vector4 data[TILE_H][TILE_W];
float importance_map[TILE_H][TILE_W];
int size_x, size_y; // actual size of tile
Tile() {};

View File

@ -1285,9 +1285,9 @@ __device__ void saveBlockDXT1_Parallel(uint endpoints, float3 colors[16], int xr
ushort endpoint0 = endpoints & 0xFFFF;
ushort endpoint1 = endpoints >> 16;
int3 palette[4];
palette[0] = color16ToInt3(endpoint0);
palette[1] = color16ToInt3(endpoint1);
int3 palette[4];
palette[0] = color16ToInt3(endpoint0);
palette[1] = color16ToInt3(endpoint1);
int d0 = colorDistance(palette[0], color);
int d1 = colorDistance(palette[1], color);

View File

@ -102,8 +102,13 @@ namespace nvtt
Format_DXT1n, // Not supported.
Format_CTX1, // Not supported.
Format_BC6,
Format_BC7,
Format_BC6, // Not supported yet.
Format_BC7, // Not supported yet.
Format_BC5_Luma, // Two DXT alpha blocks encoding a single float.
Format_BC3_RGBM, //
Format_Count
};
// Pixel types. These basically indicate how the output should be interpreted, but do not have any influence over the input. They are only relevant in RGBA mode.
@ -132,6 +137,7 @@ namespace nvtt
Decoder_D3D10,
Decoder_D3D9,
Decoder_NV5x,
//Decoder_RSX, // To take advantage of DXT5 bug.
};
@ -160,8 +166,9 @@ namespace nvtt
NVTT_API void setPitchAlignment(int pitchAlignment);
// @@ I wish this wasn't part of the compression options. Quantization is applied before compression. We don't have compressors with error diffusion.
NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127); // (Deprecated in NVTT 2.1)
// @@ I wish this wasn't part of the compression options. Quantization is applied before compression. We don't have compressors with error diffusion.
// @@ These options are only taken into account when using the InputOptions API.
NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
NVTT_API void setTargetDecoder(Decoder decoder);
@ -205,6 +212,7 @@ namespace nvtt
InputFormat_BGRA_8UB, // Normalized [0, 1] 8 bit fixed point.
InputFormat_RGBA_16F, // 16 bit floating point.
InputFormat_RGBA_32F, // 32 bit floating point.
InputFormat_R_32F, // Single channel 32 bit floating point.
};
// Mipmap downsampling filters.
@ -426,6 +434,7 @@ namespace nvtt
// A surface is one level of a 2D or 3D texture. (New in NVTT 2.1)
// @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression.
struct Surface
{
NVTT_API Surface();
@ -450,7 +459,7 @@ namespace nvtt
NVTT_API bool isNormalMap() const;
NVTT_API int countMipmaps() const;
NVTT_API int countMipmaps(int min_size) const;
NVTT_API float alphaTestCoverage(float alphaRef = 0.5) const;
NVTT_API float alphaTestCoverage(float alphaRef = 0.5, int alpha_channel = 3) const;
NVTT_API float average(int channel, int alpha_channel = -1, float gamma = 2.2f) const;
NVTT_API const float * data() const;
NVTT_API const float * channel(int i) const;
@ -496,9 +505,9 @@ namespace nvtt
NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale);
NVTT_API void setBorder(float r, float g, float b, float a);
NVTT_API void fill(float r, float g, float b, float a);
NVTT_API void scaleAlphaToCoverage(float coverage, float alphaRef = 0.5f);
NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.0f);
NVTT_API void fromRGBM(float range = 1.0f, float threshold = 0.0f);
NVTT_API void scaleAlphaToCoverage(float coverage, float alphaRef = 0.5f, int alpha_channel = 3);
NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.25f);
NVTT_API void fromRGBM(float range = 1.0f, float threshold = 0.25f);
NVTT_API void toLM(float range = 1.0f, float threshold = 0.0f);
NVTT_API void toRGBE(int mantissaBits, int exponentBits);
NVTT_API void fromRGBE(int mantissaBits, int exponentBits);
@ -511,6 +520,7 @@ namespace nvtt
NVTT_API void convolve(int channel, int kernelSize, float * kernelData);
NVTT_API void toLogScale(int channel, float base);
NVTT_API void fromLogScale(int channel, float base);
NVTT_API void setAtlasBorder(int w, int h, float r, float g, float b, float a);
NVTT_API void toneMap(ToneMapper tm, float * parameters);
@ -648,6 +658,7 @@ namespace nvtt
NVTT_API float angularError(const Surface & reference, const Surface & img);
NVTT_API Surface diff(const Surface & reference, const Surface & img, float scale);
NVTT_API float rmsToneMappedError(const Surface & reference, const Surface & img, float exposure);
} // nvtt namespace

View File

@ -341,7 +341,7 @@ int main(int argc, char *argv[])
setIndex = atoi(argv[i+1]);
for (int j = 0; j < s_imageSetCount; j++) {
if (strCaseCmp(s_imageSets[j].name, argv[i+1]) == 0) {
if (strCaseDiff(s_imageSets[j].name, argv[i+1]) == 0) {
setIndex = j;
break;
}

View File

@ -96,7 +96,7 @@ int main(int argc, char *argv[])
return 1;
}
if (nv::strCaseCmp(output.extension(), ".dds") != 0)
if (nv::strCaseDiff(output.extension(), ".dds") != 0)
{
//output.stripExtension();
output.append(".dds");

View File

@ -376,7 +376,7 @@ int main(int argc, char *argv[])
// Set input options.
nvtt::InputOptions inputOptions;
if (nv::strCaseCmp(input.extension(), ".dds") == 0)
if (nv::strCaseDiff(input.extension(), ".dds") == 0)
{
// Load surface.
nv::DirectDrawSurface dds(input.str());
@ -428,7 +428,7 @@ int main(int argc, char *argv[])
}
else
{
if (nv::strCaseCmp(input.extension(), ".exr") == 0 || nv::strCaseCmp(input.extension(), ".hdr") == 0)
if (nv::strCaseDiff(input.extension(), ".exr") == 0 || nv::strCaseDiff(input.extension(), ".hdr") == 0)
{
loadAsFloat = true;
}
@ -519,6 +519,8 @@ int main(int argc, char *argv[])
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(format);
//compressionOptions.setQuantization(/*color dithering*/true, /*alpha dithering*/false, /*binary alpha*/false);
if (format == nvtt::Format_BC2) {
// Dither alpha when using BC2.
compressionOptions.setQuantization(/*color dithering*/false, /*alpha dithering*/true, /*binary alpha*/false);
@ -539,6 +541,10 @@ int main(int argc, char *argv[])
// compressionOptions.setPixelFormat(16, 16, 16, 16);
// compressionOptions.setPixelType(nvtt::PixelType_UnsignedNorm);
// compressionOptions.setPixelFormat(16, 0, 0, 0);
//compressionOptions.setQuantization(/*color dithering*/true, /*alpha dithering*/false, /*binary alpha*/false);
//compressionOptions.setPixelType(nvtt::PixelType_UnsignedNorm);
//compressionOptions.setPixelFormat(5, 6, 5, 0);
}
}

View File

@ -37,7 +37,7 @@
static bool loadImage(nv::Image & image, const char * fileName)
{
if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0)
if (nv::strCaseDiff(nv::Path::extension(fileName), ".dds") == 0)
{
nv::DirectDrawSurface dds(fileName);
if (!dds.isValid())
@ -246,7 +246,7 @@ int main(int argc, char *argv[])
double g = float(c0.g - c1.g);
double b = float(c0.b - c1.b);
double a = float(c0.a - c1.a);
error_r.addSample(r);
error_g.addSample(g);
error_b.addSample(b);

View File

@ -40,7 +40,7 @@
static bool loadImage(nv::Image & image, const char * fileName)
{
if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0)
if (nv::strCaseDiff(nv::Path::extension(fileName), ".dds") == 0)
{
nv::DirectDrawSurface dds(fileName);
if (!dds.isValid())