nvidia-texture-tools/src/nvtt/QuickCompressDXT.cpp

// Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"

#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"

#include "nvmath/Color.inl"
#include "nvmath/Vector.inl"
#include "nvmath/Fitting.h"

#include "nvcore/Utils.h" // swap

#include <string.h> // memset
#include <float.h> // FLT_MAX

using namespace nv;
using namespace QuickCompress;


inline static void extractColorBlockRGB(const ColorBlock & rgba, Vector3 block[16])
{
	for (int i = 0; i < 16; i++)
	{
		const Color32 c = rgba.color(i);
		block[i] = Vector3(c.r, c.g, c.b);
	}
}

inline static uint extractColorBlockRGBA(const ColorBlock & rgba, Vector3 block[16])
{
	int num = 0;

	for (int i = 0; i < 16; i++)
	{
		const Color32 c = rgba.color(i);
		if (c.a > 127)
		{
			block[num++] = Vector3(c.r, c.g, c.b);
		}
	}

	return num;
}


// find minimum and maximum colors based on bounding box in color space
inline static void findMinMaxColorsBox(const Vector3 * block, uint num, Vector3 * restrict maxColor, Vector3 * restrict minColor)
{
	*maxColor = Vector3(0, 0, 0);
	*minColor = Vector3(255, 255, 255);

	for (uint i = 0; i < num; i++)
	{
		*maxColor = max(*maxColor, block[i]);
		*minColor = min(*minColor, block[i]);
	}
}


inline static void selectDiagonal(const Vector3 * block, uint num, Vector3 * restrict maxColor, Vector3 * restrict minColor)
{
	Vector3 center = (*maxColor + *minColor) * 0.5f;

	Vector2 covariance = Vector2(0.0f);
	for (uint i = 0; i < num; i++)
	{
		Vector3 t = block[i] - center;
		covariance += t.xy() * t.z;
	}

	float x0 = maxColor->x;
	float y0 = maxColor->y;
	float x1 = minColor->x;
	float y1 = minColor->y;

	if (covariance.x < 0) {
		swap(x0, x1);
	}
	if (covariance.y < 0) {
		swap(y0, y1);
	}

	maxColor->set(x0, y0, maxColor->z);
	minColor->set(x1, y1, minColor->z);
}

inline static void insetBBox(Vector3 * restrict maxColor, Vector3 * restrict minColor)
{
	Vector3 inset = (*maxColor - *minColor) / 16.0f - (8.0f / 255.0f) / 16.0f;
	*maxColor = clamp(*maxColor - inset, 0.0f, 255.0f);
	*minColor = clamp(*minColor + inset, 0.0f, 255.0f);
}

#include "nvmath/ftoi.h"

// Takes a normalized color in [0, 255] range and returns
inline static uint16 roundAndExpand(Vector3 * restrict v)
{
	uint r = ftoi_floor(clamp(v->x * (31.0f / 255.0f), 0.0f, 31.0f));
	uint g = ftoi_floor(clamp(v->y * (63.0f / 255.0f), 0.0f, 63.0f));
	uint b = ftoi_floor(clamp(v->z * (31.0f / 255.0f), 0.0f, 31.0f));

    float r0 = float(((r+0) << 3) | ((r+0) >> 2));
    float r1 = float(((r+1) << 3) | ((r+1) >> 2));
    if (fabs(v->x - r1) < fabs(v->x - r0)) r = min(r+1, 31U);

    float g0 = float(((g+0) << 2) | ((g+0) >> 4));
    float g1 = float(((g+1) << 2) | ((g+1) >> 4));
    if (fabs(v->y - g1) < fabs(v->y - g0)) g = min(g+1, 63U);

    float b0 = float(((b+0) << 3) | ((b+0) >> 2));
    float b1 = float(((b+1) << 3) | ((b+1) >> 2));
    if (fabs(v->z - b1) < fabs(v->z - b0)) b = min(b+1, 31U);


	uint16 w = (r << 11) | (g << 5) | b;

	r = (r << 3) | (r >> 2);
	g = (g << 2) | (g >> 4);
	b = (b << 3) | (b >> 2);
	*v = Vector3(float(r), float(g), float(b));

	return w;
}

// Takes a normalized color in [0, 255] range and returns
inline static uint16 roundAndExpand01(Vector3 * restrict v)
{
	uint r = ftoi_floor(clamp(v->x * 31.0f, 0.0f, 31.0f));
	uint g = ftoi_floor(clamp(v->y * 63.0f, 0.0f, 63.0f));
	uint b = ftoi_floor(clamp(v->z * 31.0f, 0.0f, 31.0f));

    float r0 = float(((r+0) << 3) | ((r+0) >> 2));
    float r1 = float(((r+1) << 3) | ((r+1) >> 2));
    if (fabs(v->x - r1) < fabs(v->x - r0)) r = min(r+1, 31U);

    float g0 = float(((g+0) << 2) | ((g+0) >> 4));
    float g1 = float(((g+1) << 2) | ((g+1) >> 4));
    if (fabs(v->y - g1) < fabs(v->y - g0)) g = min(g+1, 63U);

    float b0 = float(((b+0) << 3) | ((b+0) >> 2));
    float b1 = float(((b+1) << 3) | ((b+1) >> 2));
    if (fabs(v->z - b1) < fabs(v->z - b0)) b = min(b+1, 31U);


	uint16 w = (r << 11) | (g << 5) | b;

	r = (r << 3) | (r >> 2);
	g = (g << 2) | (g >> 4);
	b = (b << 3) | (b >> 2);
	*v = Vector3(float(r) / 255.0f, float(g) / 255.0f, float(b) / 255.0f);

	return w;
}


inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1)
{
	return dot(c0-c1, c0-c1);
}

Vector3 round255(const Vector3 & v) {
    //return Vector3(ftoi_round(255 * v.x), ftoi_round(255 * v.y), ftoi_round(255 * v.z)) * (1.0f / 255);
    //return Vector3(floorf(v.x + 0.5f), floorf(v.y + 0.5f), floorf(v.z + 0.5f));
    return v;
}


inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
	Vector3 palette[4];
	palette[0] = maxColor;
	palette[1] = minColor;
	//palette[2] = round255((2 * palette[0] + palette[1]) / 3.0f);
	//palette[3] = round255((2 * palette[1] + palette[0]) / 3.0f);
	palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
	palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);

	uint indices = 0;
	for(int i = 0; i < 16; i++)
	{
		float d0 = colorDistance(palette[0], block[i]);
		float d1 = colorDistance(palette[1], block[i]);
		float d2 = colorDistance(palette[2], block[i]);
		float d3 = colorDistance(palette[3], block[i]);

		uint b0 = d0 > d3;
		uint b1 = d1 > d2;
		uint b2 = d0 > d2;
		uint b3 = d1 > d3;
		uint b4 = d2 > d3;

		uint x0 = b1 & b2;
		uint x1 = b0 & b3;
		uint x2 = b0 & b4;

		indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
	}

	return indices;
}

// maxColor and minColor are expected to be in the same range as the color set.
/*
inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor)
{
	Vector3 palette[4];
	palette[0] = maxColor;
	palette[1] = minColor;
	palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
	palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);

    Vector3 mem[(4+2)*2];
    memset(mem, 0, sizeof(mem));

	Vector3 * row0 = mem;
	Vector3 * row1 = mem + (4+2);

	uint indices = 0;
    //for(int i = 0; i < 16; i++)
	for (uint y = 0; y < 4; y++) {
		for (uint x = 0; x < 4; x++) {
            int i = y*4+x;

            if (!set.isValidIndex(i)) {
                // Skip masked pixels and out of bounds.
                continue;
            }

            Vector3 color = set.color(i).xyz();

            // Add error.
            color += row0[1+x];

		    float d0 = colorDistance(palette[0], color);
		    float d1 = colorDistance(palette[1], color);
		    float d2 = colorDistance(palette[2], color);
		    float d3 = colorDistance(palette[3], color);

		    uint b0 = d0 > d3;
		    uint b1 = d1 > d2;
		    uint b2 = d0 > d2;
		    uint b3 = d1 > d3;
		    uint b4 = d2 > d3;

		    uint x0 = b1 & b2;
		    uint x1 = b0 & b3;
		    uint x2 = b0 & b4;

            int index = x2 | ((x0 | x1) << 1);
		    indices |= index << (2 * i);

		    // Compute new error.
		    Vector3 diff = color - palette[index];

		    // Propagate new error.
		    //row0[1+x+1] += 7.0f / 16.0f * diff;
		    //row1[1+x-1] += 3.0f / 16.0f * diff;
		    //row1[1+x+0] += 5.0f / 16.0f * diff;
		    //row1[1+x+1] += 1.0f / 16.0f * diff;
        }

		swap(row0, row1);
		memset(row1, 0, sizeof(Vector3) * (4+2));
	}

	return indices;
}*/

inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
	Vector3 palette[4];
	palette[0] = maxColor;
	palette[1] = minColor;
	//palette[2] = round255((2 * palette[0] + palette[1]) / 3.0f);
	//palette[3] = round255((2 * palette[1] + palette[0]) / 3.0f);
	palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
	palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);

	float total = 0.0f;
	for (int i = 0; i < 16; i++)
	{
		float d0 = colorDistance(palette[0], block[i]);
		float d1 = colorDistance(palette[1], block[i]);
		float d2 = colorDistance(palette[2], block[i]);
		float d3 = colorDistance(palette[3], block[i]);

		total += min(min(d0, d1), min(d2, d3));
	}

	return total;
}

inline static float evaluatePaletteError3(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
	Vector3 palette[4];
	palette[0] = minColor;
	palette[1] = maxColor;
	palette[2] = (palette[0] + palette[1]) * 0.5f;
	palette[3] = Vector3(0);

	float total = 0.0f;
	for (int i = 0; i < 16; i++)
	{
		float d0 = colorDistance(palette[0], block[i]);
		float d1 = colorDistance(palette[1], block[i]);
		float d2 = colorDistance(palette[2], block[i]);
		//float d3 = colorDistance(palette[3], block[i]);

		//total += min(min(d0, d1), min(d2, d3));
        total += min(min(d0, d1), d2);
	}

	return total;
}


// maxColor and minColor are expected to be in the same range as the color set.
/*inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor)
{
	Vector3 palette[4];
	palette[0] = minColor;
	palette[1] = maxColor;
	palette[2] = (palette[0] + palette[1]) * 0.5f;

	uint indices = 0;
	for(int i = 0; i < 16; i++)
	{
        if (!set.isValidIndex(i)) {
            // Skip masked pixels and out of bounds.
            indices |= 3 << (2 * i);
            continue;
        }

        Vector3 color = set.color(i).xyz();

		float d0 = colorDistance(palette[0], color);
		float d1 = colorDistance(palette[1], color);
		float d2 = colorDistance(palette[2], color);

		uint index;
		if (d0 < d1 && d0 < d2) index = 0;
		else if (d1 < d2) index = 1;
		else index = 2;

		indices |= index << (2 * i);
	}

	return indices;
}*/

inline static uint computeIndices3(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
	Vector3 palette[4];
	palette[0] = minColor;
	palette[1] = maxColor;
	palette[2] = (palette[0] + palette[1]) * 0.5f;

	uint indices = 0;
	for(int i = 0; i < 16; i++)
	{
		float d0 = colorDistance(palette[0], block[i]);
		float d1 = colorDistance(palette[1], block[i]);
		float d2 = colorDistance(palette[2], block[i]);

		uint index;
		if (d0 < d1 && d0 < d2) index = 0;
		else if (d1 < d2) index = 1;
		else index = 2;

		indices |= index << (2 * i);
	}

	return indices;
}


static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
{
	float alpha2_sum = 0.0f;
	float beta2_sum = 0.0f;
	float alphabeta_sum = 0.0f;
	Vector3 alphax_sum(0.0f);
	Vector3 betax_sum(0.0f);

	for( int i = 0; i < 16; ++i )
	{
		const uint bits = dxtBlock->indices >> (2 * i);

		float beta = float(bits & 1);
		if (bits & 2) beta = (1 + beta) / 3.0f;
		float alpha = 1.0f - beta;

		alpha2_sum += alpha * alpha;
		beta2_sum += beta * beta;
		alphabeta_sum += alpha * beta;
		alphax_sum += alpha * block[i];
		betax_sum += beta * block[i];
	}

	float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
	if (equal(denom, 0.0f)) return;

	float factor = 1.0f / denom;

	Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
	Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;

	a = clamp(a, 0, 255);
	b = clamp(b, 0, 255);

	uint16 color0 = roundAndExpand(&a);
	uint16 color1 = roundAndExpand(&b);

	if (color0 < color1)
	{
		swap(a, b);
		swap(color0, color1);
	}

	dxtBlock->col0 = Color16(color0);
	dxtBlock->col1 = Color16(color1);
	dxtBlock->indices = computeIndices4(block, a, b);
}

static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock)
{
	float alpha2_sum = 0.0f;
	float beta2_sum = 0.0f;
	float alphabeta_sum = 0.0f;
	Vector3 alphax_sum(0.0f);
	Vector3 betax_sum(0.0f);

	for( int i = 0; i < 16; ++i )
	{
		const uint bits = dxtBlock->indices >> (2 * i);

		float beta = float(bits & 1);
		if (bits & 2) beta = 0.5f;
		float alpha = 1.0f - beta;

		alpha2_sum += alpha * alpha;
		beta2_sum += beta * beta;
		alphabeta_sum += alpha * beta;
		alphax_sum += alpha * block[i];
		betax_sum += beta * block[i];
	}

	float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
	if (equal(denom, 0.0f)) return;

	float factor = 1.0f / denom;

	Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
	Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;

	a = clamp(a, 0, 255);
	b = clamp(b, 0, 255);

	uint16 color0 = roundAndExpand(&a);
	uint16 color1 = roundAndExpand(&b);

	if (color0 < color1)
	{
		swap(a, b);
		swap(color0, color1);
	}

	dxtBlock->col0 = Color16(color1);
	dxtBlock->col1 = Color16(color0);
	dxtBlock->indices = computeIndices3(block, a, b);
}

namespace
{

	static uint computeAlphaIndices(const AlphaBlock4x4 & src, AlphaBlockDXT5 * block)
	{
		uint8 alphas[8];
		block->evaluatePalette(alphas, false); // @@ Use target decoder.

		uint totalError = 0;

		for (uint i = 0; i < 16; i++)
		{
			uint8 alpha = src.alpha[i];

			uint besterror = 256*256;
			uint best = 8;
			for(uint p = 0; p < 8; p++)
			{
				int d = alphas[p] - alpha;
				uint error = d * d;

				if (error < besterror)
				{
					besterror = error;
					best = p;
				}
			}
			nvDebugCheck(best < 8);

			totalError += besterror;
			block->setIndex(i, best);
		}

		return totalError;
	}

	static void optimizeAlpha8(const AlphaBlock4x4 & src, AlphaBlockDXT5 * block)
	{
		float alpha2_sum = 0;
		float beta2_sum = 0;
		float alphabeta_sum = 0;
		float alphax_sum = 0;
		float betax_sum = 0;

		for (int i = 0; i < 16; i++)
		{
			uint idx = block->index(i);
			float alpha;
			if (idx < 2) alpha = 1.0f - idx;
			else alpha = (8.0f - idx) / 7.0f;

			float beta = 1 - alpha;

			alpha2_sum += alpha * alpha;
			beta2_sum += beta * beta;
			alphabeta_sum += alpha * beta;
			alphax_sum += alpha * src.alpha[i];
			betax_sum += beta * src.alpha[i];
		}

		const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);

		float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
		float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;

		uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
		uint alpha1 = uint(min(max(b, 0.0f), 255.0f));

		if (alpha0 < alpha1)
		{
			swap(alpha0, alpha1);

			// Flip indices:
			for (int i = 0; i < 16; i++)
			{
				uint idx = block->index(i);
				if (idx < 2) block->setIndex(i, 1 - idx);
				else block->setIndex(i, 9 - idx);
			}
		}
		else if (alpha0 == alpha1)
		{
			for (int i = 0; i < 16; i++)
			{
				block->setIndex(i, 0);
			}
		}

		block->alpha0 = alpha0;
		block->alpha1 = alpha1;
	}

	/*
	static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block)
	{
		float alpha2_sum = 0;
		float beta2_sum = 0;
		float alphabeta_sum = 0;
		float alphax_sum = 0;
		float betax_sum = 0;

		for (int i = 0; i < 16; i++)
		{
			uint8 x = rgba.color(i).a;
			if (x == 0 || x == 255) continue;

			uint bits = block->index(i);
			if (bits == 6 || bits == 7) continue;

			float alpha;
			if (bits == 0) alpha = 1.0f;
			else if (bits == 1) alpha = 0.0f;
			else alpha = (6.0f - block->index(i)) / 5.0f;

			float beta = 1 - alpha;

			alpha2_sum += alpha * alpha;
			beta2_sum += beta * beta;
			alphabeta_sum += alpha * beta;
			alphax_sum += alpha * x;
			betax_sum += beta * x;
		}

		const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);

		float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
		float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;

		uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
		uint alpha1 = uint(min(max(b, 0.0f), 255.0f));

		if (alpha0 > alpha1)
		{
			swap(alpha0, alpha1);
		}

		block->alpha0 = alpha0;
		block->alpha1 = alpha1;
	}
	*/

	static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1)
	{
		const uint64 mask = ~uint64(0xFFFF);
		return (block0.u | mask) == (block1.u | mask);
	}

} // namespace


void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
	if (rgba.isSingleColor())
	{
		OptimalCompress::compressDXT1(rgba.color(0), dxtBlock);
	}
	else
	{
		// read block
		Vector3 block[16];
		extractColorBlockRGB(rgba, block);

#if 1
		// find min and max colors
		Vector3 maxColor, minColor;
		findMinMaxColorsBox(block, 16, &maxColor, &minColor);

		selectDiagonal(block, 16, &maxColor, &minColor);

		insetBBox(&maxColor, &minColor);
#else
		float weights[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
		Vector3 cluster[4];
		int count = Compute4Means(16, block, weights, Vector3(1, 1, 1), cluster);

		Vector3 maxColor, minColor;
		float bestError = FLT_MAX;

		for (int i = 1; i < 4; i++)
		{
			for (int j = 0; j < i; j++)
			{
		        uint16 color0 = roundAndExpand(&cluster[i]);
		        uint16 color1 = roundAndExpand(&cluster[j]);

				float error = evaluatePaletteError4(block, cluster[i], cluster[j]);
				if (error < bestError) {
					bestError = error;
					maxColor = cluster[i];
					minColor = cluster[j];
				}
			}
		}
#endif

		uint16 color0 = roundAndExpand(&maxColor);
		uint16 color1 = roundAndExpand(&minColor);

		if (color0 < color1)
		{
			swap(maxColor, minColor);
			swap(color0, color1);
		}

		dxtBlock->col0 = Color16(color0);
		dxtBlock->col1 = Color16(color1);
		dxtBlock->indices = computeIndices4(block, maxColor, minColor);

		optimizeEndPoints4(block, dxtBlock);
	}
}


void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
	bool hasAlpha = false;

	for (uint i = 0; i < 16; i++)
	{
		if (rgba.color(i).a == 0) {
			hasAlpha = true;
			break;
		}
	}

	if (!hasAlpha)
	{
		compressDXT1(rgba, dxtBlock);
	}
	// @@ Handle single RGB, with varying alpha? We need tables for single color compressor in 3 color mode.
	//else if (rgba.isSingleColorNoAlpha()) { ... }
	else
	{
		// read block
		Vector3 block[16];
		uint num = extractColorBlockRGBA(rgba, block);

		// find min and max colors
		Vector3 maxColor, minColor;
		findMinMaxColorsBox(block, num, &maxColor, &minColor);

		selectDiagonal(block, num, &maxColor, &minColor);

		insetBBox(&maxColor, &minColor);

		uint16 color0 = roundAndExpand(&maxColor);
		uint16 color1 = roundAndExpand(&minColor);

		if (color0 < color1)
		{
			swap(maxColor, minColor);
			swap(color0, color1);
		}

		dxtBlock->col0 = Color16(color1);
		dxtBlock->col1 = Color16(color0);
		dxtBlock->indices = computeIndices3(block, maxColor, minColor);

		//	optimizeEndPoints(block, dxtBlock);
	}
}


void QuickCompress::compressDXT3(const ColorBlock & src, BlockDXT3 * dxtBlock)
{
	compressDXT1(src, &dxtBlock->color);
	OptimalCompress::compressDXT3A(src, &dxtBlock->alpha);
}

void QuickCompress::compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount/*=8*/)
{
    AlphaBlock4x4 tmp;
    tmp.init(src, 3);
    compressDXT5A(tmp, dst, iterationCount);
}

void QuickCompress::compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount/*=8*/)
{
	uint8 alpha0 = 0;
	uint8 alpha1 = 255;

	// Get min/max alpha.
	for (uint i = 0; i < 16; i++)
	{
		uint8 alpha = src.alpha[i];
		alpha0 = max(alpha0, alpha);
		alpha1 = min(alpha1, alpha);
	}

	AlphaBlockDXT5 block;
	block.alpha0 = alpha0 - (alpha0 - alpha1) / 34;
	block.alpha1 = alpha1 + (alpha0 - alpha1) / 34;
	uint besterror = computeAlphaIndices(src, &block);

	AlphaBlockDXT5 bestblock = block;

	for (int i = 0; i < iterationCount; i++)
	{
		optimizeAlpha8(src, &block);
		uint error = computeAlphaIndices(src, &block);

		if (error >= besterror)
		{
			// No improvement, stop.
			break;
		}
		if (sameIndices(block, bestblock))
		{
			bestblock = block;
			break;
		}

		besterror = error;
		bestblock = block;
	};

	// Copy best block to result;
	*dst = bestblock;
}

void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/)
{
	compressDXT1(rgba, &dxtBlock->color);
	compressDXT5A(rgba, &dxtBlock->alpha, iterationCount);
}


/*void QuickCompress::outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block)
{
    Vector3 minColor = start * 255.0f;
    Vector3 maxColor = end * 255.0f;
    uint16 color0 = roundAndExpand(&maxColor);
    uint16 color1 = roundAndExpand(&minColor);

    if (color0 < color1)
    {
        swap(maxColor, minColor);
        swap(color0, color1);
    }

    block->col0 = Color16(color0);
    block->col1 = Color16(color1);
    block->indices = computeIndices4(set, maxColor / 255.0f, minColor / 255.0f);

    //optimizeEndPoints4(set, block);
}

void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block)
{
    Vector3 minColor = start * 255.0f;
    Vector3 maxColor = end * 255.0f;
    uint16 color0 = roundAndExpand(&minColor);
    uint16 color1 = roundAndExpand(&maxColor);

    if (color0 > color1)
    {
        swap(maxColor, minColor);
        swap(color0, color1);
    }

    block->col0 = Color16(color0);
    block->col1 = Color16(color1);
    block->indices = computeIndices3(set, maxColor / 255.0f, minColor / 255.0f);

    //optimizeEndPoints3(set, block);
}
*/