Add dxt1a fast compressor.

Cleanup fast compressors, move them to QuickCompress.
2.0
castano 17 years ago
parent da3a43ba2e
commit 0008199435

@ -62,7 +62,7 @@ namespace nv
/// Return true if the block uses four color mode, false otherwise.
inline bool BlockDXT1::isFourColorMode() const
{
return col0.u >= col1.u; // @@ > or >= ?
return col0.u > col1.u;
}

@ -150,6 +150,16 @@ Color32 ColorBlock::averageColor() const
return Color32(uint8(r / 16), uint8(g / 16), uint8(b / 16), uint8(a / 16));
}
/// Return true if the block is not fully opaque.
bool ColorBlock::hasAlpha() const
{
for (uint i = 0; i < 16; i++)
{
if (m_color[i].a != 255) return true;
}
return false;
}
/// Get diameter color range.
void ColorBlock::diameterRange(Color32 * start, Color32 * end) const

@ -26,6 +26,7 @@ namespace nv
uint countUniqueColors() const;
Color32 averageColor() const;
bool hasAlpha() const;
void diameterRange(Color32 * start, Color32 * end) const;
void luminanceRange(Color32 * start, Color32 * end) const;

@ -10,6 +10,8 @@ SET(NVTT_SRCS
CompressRGB.cpp
FastCompressDXT.h
FastCompressDXT.cpp
QuickCompressDXT.h
QuickCompressDXT.cpp
dxtlib.cpp
CompressionOptions.h
CompressionOptions.cpp

@ -30,6 +30,7 @@
#include "nvtt.h"
#include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "QuickCompressDXT.h"
#include "CompressionOptions.h"
// squish
@ -66,10 +67,36 @@ void nv::fastCompressDXT1(const Image * image, const OutputOptions & outputOptio
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
//QuickCompress::compressDXT1(rgba, &block);
compressBlock_BoundsRange(rgba, &block);
optimizeEndPoints(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::fastCompressDXT1a(const Image * image, const OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRangeAlpha(rgba, &block);
// @@ Use iterative optimization.
optimizeEndPoints(rgba, &block);
//optimizeEndPoints(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));

@ -36,6 +36,7 @@ namespace nv
// Fast compressors.
void fastCompressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressDXT1a(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressDXT3(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressDXT5(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressDXT5n(const Image * image, const nvtt::OutputOptions & outputOptions);

@ -91,7 +91,7 @@ inline uint colorDistance(__m64 a, __m64 b)
__m64 v;
uint16 part[4];
} s;
s.v = absoluteDifference(a, b);
// @@ This is very slow!
@ -131,6 +131,16 @@ inline static Color32 loadColor(Color32 c)
return c;
}
inline static Color32 premultiplyAlpha(Color32 c)
{
Color32 pm;
pm.r = (c.r * c.a) >> 8;
pm.g = (c.g * c.a) >> 8;
pm.b = (c.b * c.a) >> 8;
pm.a = c.a;
return pm;
}
inline static uint sqr(uint s)
{
return s*s;
@ -192,35 +202,39 @@ inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette
uint d2 = colorDistance(vcolor2, vcolor);
uint d3 = colorDistance(vcolor3, vcolor);
/*if (d0 < d1 && d0 < d2 && d0 < d3) {
indices |= 0 << (2 * i);
}
else if (d1 < d2 && d1 < d3) {
indices |= 1 << (2 * i);
}
else if (d2 < d3) {
indices |= 2 << (2 * i);
}
else {
indices |= 3 << (2 * i);
}*/
/*
uint b0 = d0 > d2;
uint b1 = d1 > d3;
uint b2 = d0 > d3;
uint b3 = d1 > d2;
uint b4 = d0 > d1;
uint b5 = d2 > d3;
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b2 & b5;
uint x3 = !b3 & b4;
uint x2 = b0 & b4;
indices |= ((x3 | x2) | ((x1 | x0) << 1)) << (2 * i);
*/
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
vectorEnd();
return indices;
}
inline static uint computeIndicesAlpha(const ColorBlock & rgba, const Color32 palette[4])
{
const VectorColor vcolor0 = loadColor(palette[0]);
const VectorColor vcolor1 = loadColor(palette[1]);
const VectorColor vcolor2 = loadColor(palette[2]);
const VectorColor vcolor3 = loadColor(palette[3]);
uint indices = 0;
for(int i = 0; i < 16; i++) {
const VectorColor vcolor = premultiplyAlpha(loadColor(rgba.color(i)));
uint d0 = colorDistance(vcolor0, vcolor);
uint d1 = colorDistance(vcolor1, vcolor);
uint d2 = colorDistance(vcolor2, vcolor);
uint d3 = colorDistance(vcolor3, vcolor);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
@ -238,6 +252,7 @@ inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette
return indices;
}
inline static Color16 saturate16(int r, int g, int b)
{
Color16 c;
@ -299,7 +314,7 @@ void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block)
block->col0 = toColor16(c0);
block->col1 = toColor16(c1);
nvDebugCheck(block->col0.u >= block->col1.u);
nvDebugCheck(block->col0.u > block->col1.u);
// Use 4 color mode only.
//if (block->col0.u < block->col1.u) {
@ -312,6 +327,29 @@ void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block)
block->indices = computeIndices(rgba, palette);
}
// Compressor that uses bounding box and takes alpha into account.
void nv::compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block)
{
Color32 c0, c1;
rgba.boundsRange(&c1, &c0);
if (rgba.hasAlpha())
{
block->col0 = toColor16(c1);
block->col1 = toColor16(c0);
}
else
{
block->col0 = toColor16(c0);
block->col1 = toColor16(c1);
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeIndicesAlpha(rgba, palette);
}
// Compressor that uses the best fit axis.
void nv::compressBlock_BestFitAxis(const ColorBlock & rgba, BlockDXT1 * block)
@ -326,6 +364,9 @@ void nv::compressBlock_BestFitAxis(const ColorBlock & rgba, BlockDXT1 * block)
if (block->col0.u < block->col1.u) {
swap(block->col0.u, block->col1.u);
}
else if (block->col0.u == block->col1.u) {
block->col0.u++;
}
Color32 palette[4];
block->evaluatePalette4(palette);
@ -1114,7 +1155,7 @@ void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block)
block->color.col0 = toColor16(c0);
block->color.col1 = toColor16(c1);
nvDebugCheck(block->color.col0.u >= block->color.col1.u);
nvDebugCheck(block->color.col0.u > block->color.col1.u);
Color32 palette[4];
block->color.evaluatePalette4(palette);

@ -46,6 +46,9 @@ namespace nv
// Compressor that uses bounding box.
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box and takes alpha into account.
void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the best fit axis.
void compressBlock_BestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);

@ -0,0 +1,177 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "QuickCompressDXT.h"
using namespace nv;
using namespace QuickCompress;
inline static Vector3 loadColor(Color32 c)
{
return Vector3(c.r, c.g, c.b);
}
inline static void extractColorBlockRGB(const ColorBlock & rgba, Vector3 block[16])
{
for (int i = 0; i < 16; i++)
{
block[i] = loadColor(rgba.color(i));
}
}
// find minimum and maximum colors based on bounding box in color space
inline static void findMinMaxColorsBox(Vector3 block[16], Vector3 * __restrict maxColor, Vector3 * __restrict minColor)
{
*maxColor = Vector3(0, 0, 0);
*minColor = Vector3(1, 1, 1);
for (int i = 0; i < 16; i++)
{
*maxColor = max(*maxColor, block[i]);
*minColor = min(*minColor, block[i]);
}
}
inline static void selectDiagonal(Vector3 block[16], Vector3 * __restrict maxColor, Vector3 * __restrict minColor)
{
Vector3 center = (*maxColor + *minColor) * 0.5;
Vector2 covariance = Vector2(zero);
for (int i = 0; i < 16; i++)
{
Vector3 t = block[i] - center;
covariance += t.xy() * t.z();
}
float x0 = maxColor->x();
float y0 = maxColor->y();
float x1 = minColor->x();
float y1 = minColor->y();
if (covariance.x() < 0) {
swap(x0, x1);
}
if (covariance.y() < 0) {
swap(y0, y1);
}
maxColor->set(x0, y0, maxColor->z());
minColor->set(x1, y1, minColor->z());
}
inline static void insetBBox(Vector3 * __restrict maxColor, Vector3 * __restrict minColor)
{
Vector3 inset = (*maxColor - *minColor) / 16.0f - (8.0f / 255.0f) / 16.0f;
*maxColor = clamp(*maxColor - inset, 0.0f, 255.0f);
*minColor = clamp(*minColor + inset, 0.0f, 255.0f);
}
inline static uint16 roundAndExpand(Vector3 * v)
{
uint r = clamp(v->x() * (31.0f / 255.0f), 0.0f, 31.0f);
uint g = clamp(v->y() * (63.0f / 255.0f), 0.0f, 63.0f);
uint b = clamp(v->z() * (31.0f / 255.0f), 0.0f, 31.0f);
uint16 w = (r << 11) | (g << 5) | b;
r = (r << 3) | (r >> 2);
g = (g << 2) | (g >> 4);
b = (b << 3) | (b >> 2);
*v = Vector3(r, g, b);
return w;
}
inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1)
{
return dot(c0-c1, c0-c1);
}
inline static uint computeIndices(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 c[4];
c[0] = maxColor;
c[1] = minColor;
c[2] = lerp(c[0], c[1], 1.0/3.0);
c[3] = lerp(c[0], c[1], 2.0/3.0);
uint indices = 0;
for(int i = 0; i < 16; i++)
{
float d0 = colorDistance(c[0], block[i]);
float d1 = colorDistance(c[1], block[i]);
float d2 = colorDistance(c[2], block[i]);
float d3 = colorDistance(c[3], block[i]);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
// read block
Vector3 block[16];
extractColorBlockRGB(rgba, block);
// find min and max colors
Vector3 maxColor, minColor;
findMinMaxColorsBox(block, &maxColor, &minColor);
//selectDiagonal(block, &maxColor, &minColor);
//insetBBox(&minColor, &maxColor);
uint16 color0 = roundAndExpand(&maxColor);
uint16 color1 = roundAndExpand(&minColor);
/*if (color0 < color1)
{
swap(maxColor, minColor);
swap(color0, color1);
}*/
// @@ Optimize endpoints.
dxtBlock->col0 = Color16(color0);
dxtBlock->col1 = Color16(color1);
dxtBlock->indices = computeIndices(block, maxColor, minColor);
}

@ -0,0 +1,40 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_QUICKCOMPRESSDXT_H
#define NV_TT_QUICKCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
namespace QuickCompress
{
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * block);
}
} // nv namespace
#endif // NV_TT_QUICKCOMPRESSDXT_H

@ -50,7 +50,7 @@ namespace
static int blockSize(Format format)
{
if (format == Format_DXT1 /*|| format == Format_DXT1a*/) {
if (format == Format_DXT1 || format == Format_DXT1a) {
return 8;
}
else if (format == Format_DXT3) {
@ -134,7 +134,7 @@ static void outputHeader(const InputOptions::Private & inputOptions, const Outpu
{
header.setLinearSize(computeImageSize(img->width, img->height, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 /*|| compressionOptions.format == Format_DXT1a*/) {
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
}
else if (compressionOptions.format == Format_DXT3) {
@ -210,6 +210,11 @@ static bool compressMipmap(const Image * image, const OutputOptions & outputOpti
}
}
}
else if (compressionOptions.format == Format_DXT1a)
{
// @@ Only fast compression mode for now.
fastCompressDXT1a(image, outputOptions);
}
else if (compressionOptions.format == Format_DXT3)
{
if (compressionOptions.quality == Quality_Fastest)
@ -363,10 +368,10 @@ static void quantize(Image * img, const InputOptions::Private & inputOptions, Fo
{
Quantize::Alpha4(img);
}
/*else if (format == Format_DXT1a)
else if (format == Format_DXT1a)
{
Quantize::BinaryAlpha(img, inputOptions.alphaThreshold);
}*/
}
}
}
}

@ -55,13 +55,14 @@ namespace nvtt
// DX9 formats.
Format_DXT1,
// Format_DXT1a, // DXT1 with binary alpha.
Format_DXT1a, // DXT1 with binary alpha.
Format_DXT3,
Format_DXT5,
Format_DXT5n, // Compressed HILO: R=0, G=x, B=0, A=y
// DX10 formats.
Format_BC1 = Format_DXT1,
Format_BC1a = Format_DXT1a,
Format_BC2 = Format_DXT3,
Format_BC3 = Format_DXT5,
Format_BC3n = Format_DXT5n,

@ -190,6 +190,10 @@ int main(int argc, char *argv[])
{
format = nvtt::Format_BC1;
}
else if (strcmp("-bc1a", argv[i]) == 0)
{
format = nvtt::Format_BC1a;
}
else if (strcmp("-bc2", argv[i]) == 0)
{
format = nvtt::Format_BC2;
@ -257,6 +261,7 @@ int main(int argc, char *argv[])
printf(" -nocuda \tDo not use cuda compressor.\n");
printf(" -rgb \tRGBA format\n");
printf(" -bc1 \tBC1 format (DXT1)\n");
printf(" -bc1a \tBC1 format with binary alpha (DXT1a)\n");
printf(" -bc2 \tBC2 format (DXT3)\n");
printf(" -bc3 \tBC3 format (DXT5)\n");
printf(" -bc3n \tBC3 normal map format (DXT5nm)\n");

@ -68,14 +68,14 @@ struct Error
{
samples = 0;
mabse = 0.0f;
mse = 0.0f;
maxabse = 0.0f;
mse = 0.0f;
}
void addSample(float e)
{
samples++;
mabse += fabs(e);
mabse += fabsf(e);
maxabse = nv::max(maxabse, fabsf(e));
mse += e * e;
}

@ -93,7 +93,6 @@ public:
Vector4();
explicit Vector4(zero_t);
explicit Vector4(identity_t);
Vector4(scalar x, scalar y, scalar z, scalar w);
Vector4(Vector2::Arg v, scalar z, scalar w);
Vector4(Vector3::Arg v, scalar w);
@ -278,7 +277,6 @@ inline bool operator!=(Vector3::Arg a, Vector3::Arg b)
inline Vector4::Vector4() {}
inline Vector4::Vector4(zero_t) : m_x(0.0f), m_y(0.0f), m_z(0.0f), m_w(0.0f) {}
inline Vector4::Vector4(identity_t) : m_x(0.0f), m_y(0.0f), m_z(0.0f), m_w(1.0f) {}
inline Vector4::Vector4(scalar x, scalar y, scalar z, scalar w) : m_x(x), m_y(y), m_z(z), m_w(w) {}
inline Vector4::Vector4(Vector2::Arg v, scalar z, scalar w) : m_x(v.x()), m_y(v.y()), m_z(z), m_w(w) {}
inline Vector4::Vector4(Vector3::Arg v, scalar w) : m_x(v.x()), m_y(v.y()), m_z(v.z()), m_w(w) {}
@ -387,6 +385,7 @@ inline Vector2 operator-(Vector2::Arg a, Vector2::Arg b)
return sub(a, b);
}
inline Vector2 scale(Vector2::Arg v, scalar s)
{
return Vector2(v.x() * s, v.y() * s);
@ -454,19 +453,35 @@ inline Vector3 add(Vector3::Arg a, Vector3::Arg b)
{
return Vector3(a.x() + b.x(), a.y() + b.y(), a.z() + b.z());
}
inline Vector3 add(Vector3::Arg a, float b)
{
return Vector3(a.x() + b, a.y() + b, a.z() + b);
}
inline Vector3 operator+(Vector3::Arg a, Vector3::Arg b)
{
return add(a, b);
}
inline Vector3 operator+(Vector3::Arg a, float b)
{
return add(a, b);
}
inline Vector3 sub(Vector3::Arg a, Vector3::Arg b)
{
return Vector3(a.x() - b.x(), a.y() - b.y(), a.z() - b.z());
}
inline Vector3 sub(Vector3::Arg a, float b)
{
return Vector3(a.x() - b, a.y() - b, a.z() - b);
}
inline Vector3 operator-(Vector3::Arg a, Vector3::Arg b)
{
return sub(a, b);
}
inline Vector3 operator-(Vector3::Arg a, float b)
{
return sub(a, b);
}
inline Vector3 cross(Vector3::Arg a, Vector3::Arg b)
{
@ -562,6 +577,11 @@ inline Vector3 max(Vector3::Arg a, Vector3::Arg b)
return Vector3(max(a.x(), b.x()), max(a.y(), b.y()), max(a.z(), b.z()));
}
inline Vector3 clamp(Vector3::Arg v, float min, float max)
{
return Vector3(clamp(v.x(), min, max), clamp(v.y(), min, max), clamp(v.z(), min, max));
}
inline bool isValid(Vector3::Arg v)
{
return isFinite(v.x()) && isFinite(v.y()) && isFinite(v.z());

Loading…
Cancel
Save