flatten tree.

This commit is contained in:
castano
2007-10-14 08:58:24 +00:00
parent b553bb11b8
commit e34a12a46f
77 changed files with 6 additions and 9 deletions

62
src/nvtt/CMakeLists.txt Normal file
View File

@ -0,0 +1,62 @@
PROJECT(nvtt)
ADD_SUBDIRECTORY(squish)
SET(NVTT_SRCS
nvtt.h
CompressDXT.h
CompressDXT.cpp
CompressRGB.h
CompressRGB.cpp
FastCompressDXT.h
FastCompressDXT.cpp
QuickCompressDXT.h
QuickCompressDXT.cpp
dxtlib.cpp
CompressionOptions.h
CompressionOptions.cpp
InputOptions.h
InputOptions.cpp
OutputOptions.cpp
cuda/CudaUtils.h
cuda/CudaUtils.cpp
cuda/CudaCompressDXT.h
cuda/CudaCompressDXT.cpp)
IF(CUDA_FOUND)
ADD_DEFINITIONS(-DHAVE_CUDA)
WRAP_CUDA(CUDA_SRCS cuda/CompressKernel.cu)
SET(NVTT_SRCS ${NVTT_SRCS} ${CUDA_SRCS})
SET(LIBS ${LIBS} ${CUDA_LIBRARIES})
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_PATH})
ENDIF(CUDA_FOUND)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVTT_EXPORTS)
IF(NVTT_SHARED)
ADD_LIBRARY(nvtt SHARED ${DXT_SRCS})
ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish)
# test executables
ADD_EXECUTABLE(nvcompress tools/compress.cpp)
TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvdecompress tools/decompress.cpp)
TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage)
ADD_EXECUTABLE(nvddsinfo tools/ddsinfo.cpp)
TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage)
ADD_EXECUTABLE(nvimgdiff tools/imgdiff.cpp)
TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage)
INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff DESTINATION bin)

551
src/nvtt/CompressDXT.cpp Normal file
View File

@ -0,0 +1,551 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Memory.h>
#include <nvimage/Image.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "nvtt.h"
#include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "QuickCompressDXT.h"
#include "CompressionOptions.h"
// squish
#include "squish/colourset.h"
//#include "squish/clusterfit.h"
#include "squish/fastclusterfit.h"
#include "squish/weightedclusterfit.h"
// s3_quant
#if defined(HAVE_S3QUANT)
#include "s3tc/s3_quant.h"
#endif
// ati tc
#if defined(HAVE_ATITC)
#include "atitc/ATI_Compress.h"
#endif
//#include <time.h>
using namespace nv;
using namespace nvtt;
void nv::fastCompressDXT1(const Image * image, const OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
QuickCompress::compressDXT1(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::fastCompressDXT1a(const Image * image, const OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
QuickCompress::compressDXT1a(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT3 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
// copy X coordinate to alpha channel and Y coordinate to green channel.
rgba.swizzleDXT5n();
compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::fastCompressBC4(const Image * image, const nvtt::OutputOptions & outputOptions)
{
// @@ TODO
// compress red channel (X)
}
void nv::fastCompressBC5(const Image * image, const nvtt::OutputOptions & outputOptions)
{
// @@ TODO
// compress red, green channels (X,Y)
}
void nv::doPrecomputation()
{
static bool done = false;
if (!done)
{
done = true;
squish::FastClusterFit::doPrecomputation();
}
}
void nv::compressDXT1(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT1 block;
doPrecomputation();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
squish::FastClusterFit fit(&colours, squish::kDxt1);
//squish::WeightedClusterFit fit(&colours, squish::kDxt1);
//squish::ClusterFit fit(&colours, squish::kDxt1);
fit.setMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
fit.Compress(&block);
// @@ Use iterative cluster fit algorithm to improve error in highest quality mode.
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::compressDXT3(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT3 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
// Compress explicit alpha.
compressBlock(rgba, &block.alpha);
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
squish::WeightedClusterFit fit(&colours, 0);
fit.setMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
fit.Compress(&block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::compressDXT5(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
// Compress alpha.
uint error;
if (compressionOptions.quality == Quality_Highest)
{
error = compressBlock_BruteForce(rgba, &block.alpha);
}
else
{
error = compressBlock_Iterative(rgba, &block.alpha);
}
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
squish::WeightedClusterFit fit(&colours, 0);
fit.setMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
fit.Compress(&block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::compressDXT5n(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock rgba;
BlockDXT5 block;
doPrecomputation();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
// copy X coordinate to green channel and Y coordinate to alpha channel.
rgba.swizzleDXT5n();
// Compress X.
uint error = compressBlock_Iterative(rgba, &block.alpha);
if (compressionOptions.quality == Quality_Highest)
{
error = compressBlock_BruteForce(rgba, &block.alpha);
}
// Compress Y.
compressGreenBlock_BruteForce(rgba, &block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::compressBC4(const Image * image, const nvtt::OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
// threshold should be from [0 - 1] but may also be higher...
const uint threshold = uint(compressionOptions.errorThreshold * 256);
ColorBlock rgba;
AlphaBlockDXT5 block;
uint totalError = 0;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
//error = compressBlock_BoundsRange(rgba, &block);
uint error = compressBlock_Iterative(rgba, &block);
if (compressionOptions.quality == Quality_Highest ||
(compressionOptions.quality == Quality_Production && error > threshold))
{
// Try brute force algorithm.
error = compressBlock_BruteForce(rgba, &block);
}
totalError += error;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
// @@ All the compressors should work like this.
// Effect of adjusting threshold:
// (threshold: error - time)
// 0: 4.29 - 1.83
// 32: 4.32 - 1.77
// 48: 4.37 - 1.72
// 64: 4.43 - 1.45
// 74: 4.45 - 1.35
// 92: 4.54 - 1.15
// 128: 4.67 - 0.79
// 256: 4.92 - 0.20
// inf: 4.98 - 0.09
//printf("Alpha error: %f\n", float(totalError) / (w*h));
}
void nv::compressBC5(const Image * image, const nvtt::OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
const uint w = image->width();
const uint h = image->height();
ColorBlock xcolor;
ColorBlock ycolor;
BlockATI2 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
xcolor.init(image, x, y);
xcolor.splatX();
ycolor.init(image, x, y);
ycolor.splatY();
// @@ Compute normal error, instead of separate xy errors.
uint xerror, yerror;
if (compressionOptions.quality == Quality_Highest)
{
xerror = compressBlock_BruteForce(xcolor, &block.x);
yerror = compressBlock_BruteForce(ycolor, &block.y);
}
else
{
xerror = compressBlock_Iterative(xcolor, &block.x);
yerror = compressBlock_Iterative(ycolor, &block.y);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
#if defined(HAVE_S3QUANT)
void nv::s3CompressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
float error = 0.0f;
BlockDXT1 dxtBlock3;
BlockDXT1 dxtBlock4;
ColorBlock block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
block.init(image, x, y);
// Init rgb block.
RGBBlock rgbBlock;
rgbBlock.n = 16;
for (uint i = 0; i < 16; i++) {
rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f);
}
rgbBlock.weight[0] = 1.0f;
rgbBlock.weight[1] = 1.0f;
rgbBlock.weight[2] = 1.0f;
rgbBlock.inLevel = 4;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock4.col0.r = rgbBlock.endPoint[0][0];
dxtBlock4.col0.g = rgbBlock.endPoint[0][1];
dxtBlock4.col0.b = rgbBlock.endPoint[0][2];
dxtBlock4.col1.r = rgbBlock.endPoint[1][0];
dxtBlock4.col1.g = rgbBlock.endPoint[1][1];
dxtBlock4.col1.b = rgbBlock.endPoint[1][2];
dxtBlock4.setIndices(rgbBlock.index);
if (dxtBlock4.col0.u < dxtBlock4.col1.u) {
swap(dxtBlock4.col0.u, dxtBlock4.col1.u);
dxtBlock4.indices ^= 0x55555555;
}
uint error4 = blockError(block, dxtBlock4);
rgbBlock.inLevel = 3;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock3.col0.r = rgbBlock.endPoint[0][0];
dxtBlock3.col0.g = rgbBlock.endPoint[0][1];
dxtBlock3.col0.b = rgbBlock.endPoint[0][2];
dxtBlock3.col1.r = rgbBlock.endPoint[1][0];
dxtBlock3.col1.g = rgbBlock.endPoint[1][1];
dxtBlock3.col1.b = rgbBlock.endPoint[1][2];
dxtBlock3.setIndices(rgbBlock.index);
if (dxtBlock3.col0.u > dxtBlock3.col1.u) {
swap(dxtBlock3.col0.u, dxtBlock3.col1.u);
dxtBlock3.indices ^= (~dxtBlock3.indices >> 1) & 0x55555555;
}
uint error3 = blockError(block, dxtBlock3);
if (error3 < error4) {
error += error3;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3));
}
}
else {
error += error4;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4));
}
}
}
}
printf("error = %f\n", error/((w+3)/4 * (h+3)/4));
}
#endif // defined(HAVE_S3QUANT)
#if defined(HAVE_ATITC)
void nv::atiCompressDXT1(const Image * image, const OutputOptions & outputOptions)
{
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = image->width();
srcTexture.dwHeight = image->height();
srcTexture.dwPitch = image->width() * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) image->pixels();
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = image->width();
destTexture.dwHeight = image->height();
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
}
#endif // defined(HAVE_ATITC)

66
src/nvtt/CompressDXT.h Normal file
View File

@ -0,0 +1,66 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSDXT_H
#define NV_TT_COMPRESSDXT_H
#include <nvimage/nvimage.h>
#include "nvtt.h"
namespace nv
{
class Image;
class FloatImage;
void doPrecomputation();
// Fast compressors.
void fastCompressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressDXT1a(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressDXT3(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressDXT5(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressDXT5n(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressBC4(const Image * image, const nvtt::OutputOptions & outputOptions);
void fastCompressBC5(const Image * image, const nvtt::OutputOptions & outputOptions);
// Normal compressors.
void compressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5n(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC4(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC5(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
// External compressors.
#if defined(HAVE_S3QUANT)
void s3CompressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions);
#endif
#if defined(HAVE_ATITC)
void atiCompressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions);
#endif
} // nv namespace
#endif // NV_TT_COMPRESSDXT_H

134
src/nvtt/CompressRGB.cpp Normal file
View File

@ -0,0 +1,134 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvimage/Image.h>
#include <nvimage/PixelFormat.h>
#include <nvmath/Color.h>
#include "CompressRGB.h"
#include "CompressionOptions.h"
using namespace nv;
using namespace nvtt;
namespace
{
inline uint computePitch(uint w, uint bitsize)
{
uint p = w * ((bitsize + 7) / 8);
// Align to 32 bits.
return ((p + 3) / 4) * 4;
}
inline void convert_to_a8r8g8b8(const void * src, void * dst, uint w)
{
memcpy(dst, src, 4 * w);
}
inline void convert_to_x8r8g8b8(const void * src, void * dst, uint w)
{
memcpy(dst, src, 4 * w);
}
} // namespace
// Pixel format converter.
void nv::compressRGB(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
const uint bitCount = compressionOptions.bitcount;
nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32);
const uint byteCount = bitCount / 8;
const uint rmask = compressionOptions.rmask;
uint rshift, rsize;
PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
const uint gmask = compressionOptions.gmask;
uint gshift, gsize;
PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
const uint bmask = compressionOptions.bmask;
uint bshift, bsize;
PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
const uint amask = compressionOptions.amask;
uint ashift, asize;
PixelFormat::maskShiftAndSize(amask, &ashift, &asize);
// Determine pitch.
uint pitch = computePitch(w, compressionOptions.bitcount);
uint8 * dst = (uint8 *)mem::malloc(pitch + 4);
for (uint y = 0; y < h; y++)
{
const Color32 * src = image->scanline(y);
if (bitCount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0xFF000000)
{
convert_to_a8r8g8b8(src, dst, w);
}
else if (bitCount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0)
{
convert_to_x8r8g8b8(src, dst, w);
}
else
{
// Generic pixel format conversion.
for (uint x = 0; x < w; x++)
{
uint c = 0;
c |= PixelFormat::convert(src[x].r, 8, rsize) << rshift;
c |= PixelFormat::convert(src[x].g, 8, gsize) << gshift;
c |= PixelFormat::convert(src[x].b, 8, bsize) << bshift;
c |= PixelFormat::convert(src[x].a, 8, asize) << ashift;
// Output one byte at a time. @@ Not tested... Does this work on LE and BE?
for (uint i = 0; i < byteCount; i++)
{
*(dst + x * byteCount) = (c >> (i * 8)) & 0xFF;
}
}
}
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(dst, pitch);
}
}
mem::free(dst);
}

39
src/nvtt/CompressRGB.h Normal file
View File

@ -0,0 +1,39 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSRGB_H
#define NV_TT_COMPRESSRGB_H
#include "nvtt.h"
namespace nv
{
class Image;
// Pixel format converter.
void compressRGB(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
} // nv namespace
#endif // NV_TT_COMPRESSDXT_H

View File

@ -0,0 +1,132 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "nvtt.h"
#include "CompressionOptions.h"
using namespace nv;
using namespace nvtt;
/// Constructor. Sets compression options to the default values.
CompressionOptions::CompressionOptions() : m(*new CompressionOptions::Private())
{
reset();
}
/// Destructor.
CompressionOptions::~CompressionOptions()
{
delete &m;
}
/// Set default compression options.
void CompressionOptions::reset()
{
m.format = Format_DXT1;
m.quality = Quality_Normal;
m.colorWeight.set(1.0f, 1.0f, 1.0f);
m.useCuda = true;
m.bitcount = 32;
m.bmask = 0x000000FF;
m.gmask = 0x0000FF00;
m.rmask = 0x00FF0000;
m.amask = 0xFF000000;
}
/// Set desired compression format.
void CompressionOptions::setFormat(Format format)
{
m.format = format;
}
/// Set compression quality settings.
void CompressionOptions::setQuality(Quality quality, float errorThreshold /*= 0.5f*/)
{
m.quality = quality;
m.errorThreshold = errorThreshold;
}
/// Set the weights of each color channel.
/// The choice for these values is subjective. In many case uniform color weights
/// (1.0, 1.0, 1.0) work very well. A popular choice is to use the NTSC luma encoding
/// weights (0.2126, 0.7152, 0.0722), but I think that blue contributes to our
/// perception more than a 7%. A better choice in my opinion is (3, 4, 2). Ideally
/// the compressor should use a non linear colour metric as described here:
/// http://www.compuphase.com/cmetric.htm
void CompressionOptions::setColorWeights(float red, float green, float blue)
{
float total = red + green + blue;
float x = blue / total;
float y = green / total;
m.colorWeight.set(x, y, 1.0f - x - y);
}
/// Enable or disable hardware compression.
void CompressionOptions::enableHardwareCompression(bool enable)
{
m.useCuda = enable;
}
/// Set color mask to describe the RGB/RGBA format.
void CompressionOptions::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask)
{
// Validate arguments.
nvCheck(bitcount == 8 || bitcount == 16 || bitcount == 24 || bitcount == 32);
nvCheck((rmask & gmask) == 0);
nvCheck((rmask & bmask) == 0);
nvCheck((rmask & amask) == 0);
nvCheck((gmask & bmask) == 0);
nvCheck((gmask & amask) == 0);
nvCheck((bmask & amask) == 0);
if (bitcount != 32)
{
uint maxMask = (1 << bitcount);
nvCheck(maxMask > rmask);
nvCheck(maxMask > gmask);
nvCheck(maxMask > bmask);
nvCheck(maxMask > amask);
}
m.bitcount = bitcount;
m.rmask = rmask;
m.gmask = gmask;
m.bmask = bmask;
m.amask = amask;
}
/// Use external compressor.
void CompressionOptions::setExternalCompressor(const char * name)
{
m.externalCompressor = name;
}

View File

@ -0,0 +1,57 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSIONOPTIONS_H
#define NV_TT_COMPRESSIONOPTIONS_H
#include <nvcore/StrLib.h>
#include <nvmath/Vector.h>
#include "nvtt.h"
namespace nvtt
{
struct CompressionOptions::Private
{
Format format;
Quality quality;
float errorThreshold;
nv::Vector3 colorWeight;
uint bitcount;
uint rmask;
uint gmask;
uint bmask;
uint amask;
bool useCuda;
nv::String externalCompressor;
};
} // nvtt namespace
#endif // NV_TT_COMPRESSIONOPTIONS_H

1438
src/nvtt/FastCompressDXT.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,87 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_FASTCOMPRESSDXT_H
#define NV_TT_FASTCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
// Color compression:
// Compressor that uses the extremes of the luminance axis.
void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the extremes of the luminance axis.
void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box.
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box and takes alpha into account.
void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the best fit axis.
void compressBlock_BestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Simple, but slow compressor that tests all color pairs.
void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force 6d search along the best fit axis.
void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Spatial greedy search.
void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force compressor for DXT5n
void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
// Minimize error of the endpoints.
void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
// Alpha compression:
void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block);
uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
} // nv namespace
#endif // NV_TT_FASTCOMPRESSDXT_H

260
src/nvtt/InputOptions.cpp Normal file
View File

@ -0,0 +1,260 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <string.h> // memcpy
#include <nvcore/Memory.h>
#include "nvtt.h"
#include "InputOptions.h"
using namespace nv;
using namespace nvtt;
namespace
{
static int countMipmaps(int w, int h, int d)
{
int mipmap = 0;
while (w != 1 && h != 1) {
w = max(1, w / 2);
h = max(1, h / 2);
d = max(1, d / 2);
mipmap++;
}
return mipmap + 1;
}
} // namespace
/// Constructor.
InputOptions::InputOptions() : m(*new InputOptions::Private())
{
reset();
}
// Delete images.
InputOptions::~InputOptions()
{
resetTextureLayout();
delete &m;
}
// Reset input options.
void InputOptions::reset()
{
m.wrapMode = WrapMode_Repeat;
m.textureType = TextureType_2D;
m.inputFormat = InputFormat_BGRA_8UB;
m.enableColorDithering = false;
m.enableAlphaDithering = false;
m.binaryAlpha = false;
m.alphaThreshold = 127;
m.alphaTransparency = true;
m.inputGamma = 2.2f;
m.outputGamma = 2.2f;
m.generateMipmaps = false;
m.maxLevel = -1;
m.mipmapFilter = MipmapFilter_Box;
m.normalizeMipmaps = false;
m.convertToNormalMap = false;
m.heightFactors.set(0.0f, 0.0f, 0.0f, 1.0f);
m.bumpFrequencyScale = Vector4(1.0f, 0.5f, 0.25f, 0.125f) / (1.0f + 0.5f + 0.25f + 0.125f);
}
// Setup the input image.
void InputOptions::setTextureLayout(TextureType type, int width, int height, int depth /*= 1*/)
{
// Validate arguments.
nvCheck(width >= 0);
nvCheck(height >= 0);
nvCheck(depth >= 0);
// Correct arguments.
if (width == 0) width = 1;
if (height == 0) height = 1;
if (depth == 0) depth = 1;
// Delete previous images.
resetTextureLayout();
m.textureType = type;
// Allocate images.
m.mipmapCount = countMipmaps(width, height, depth);
m.faceCount = (type == TextureType_Cube) ? 6 : 1;
m.imageCount = m.mipmapCount * m.faceCount;
m.images = new Private::Image[m.imageCount];
for(int f = 0; f < m.faceCount; f++)
{
int w = width;
int h = height;
int d = depth;
for (int mipLevel = 0; mipLevel < m.mipmapCount; mipLevel++)
{
Private::Image & img = m.images[f * m.mipmapCount + mipLevel];
img.width = w;
img.height = h;
img.depth = d;
img.mipLevel = mipLevel;
img.face = f;
img.data = NULL;
w = max(1, w / 2);
h = max(1, h / 2);
d = max(1, d / 2);
}
}
}
void InputOptions::resetTextureLayout()
{
if (m.images != NULL)
{
// Delete image array.
delete [] m.images;
m.images = NULL;
m.faceCount = 0;
m.mipmapCount = 0;
m.imageCount = 0;
}
}
// Copies the data to our internal structures.
bool InputOptions::setMipmapData(const void * data, int width, int height, int depth /*= 1*/, int face /*= 0*/, int mipLevel /*= 0*/)
{
nvCheck(depth == 1);
const int idx = face * m.mipmapCount + mipLevel;
if (m.images[idx].width != width || m.images[idx].height != height || m.images[idx].depth != depth || m.images[idx].mipLevel != mipLevel || m.images[idx].face != face)
{
// Invalid dimension or index.
return false;
}
m.images[idx].data = new nv::Image();
m.images[idx].data->allocate(width, height);
memcpy(m.images[idx].data->pixels(), data, width * height * 4);
return true;
}
/// Describe the format of the input.
void InputOptions::setFormat(InputFormat format, bool alphaTransparency)
{
m.inputFormat = format;
m.alphaTransparency = alphaTransparency;
}
/// Set gamma settings.
void InputOptions::setGamma(float inputGamma, float outputGamma)
{
m.inputGamma = inputGamma;
m.outputGamma = outputGamma;
}
/// Set texture wrappign mode.
void InputOptions::setWrapMode(WrapMode mode)
{
m.wrapMode = mode;
}
/// Set mipmapping options.
void InputOptions::setMipmapping(bool generateMipmaps, MipmapFilter filter/*= MipmapFilter_Kaiser*/, int maxLevel/*= -1*/)
{
m.generateMipmaps = generateMipmaps;
m.mipmapFilter = filter;
m.maxLevel = maxLevel;
}
/// Set quantization options.
/// @warning Do not enable dithering unless you know what you are doing. Quantization
/// introduces errors. It's better to let the compressor quantize the result to
/// minimize the error, instead of quantizing the data before handling it to
/// the compressor.
void InputOptions::setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold/*= 127*/)
{
m.enableColorDithering = colorDithering;
m.enableAlphaDithering = alphaDithering;
m.binaryAlpha = binaryAlpha;
m.alphaThreshold = alphaThreshold;
}
/// Indicate whether input is a normal map or not.
void InputOptions::setNormalMap(bool b)
{
m.normalMap = b;
}
/// Enable normal map conversion.
void InputOptions::setConvertToNormalMap(bool convert)
{
m.convertToNormalMap = convert;
}
/// Set height evaluation factors.
void InputOptions::setHeightEvaluation(float redScale, float greenScale, float blueScale, float alphaScale)
{
// Do not normalize height factors.
// float total = redScale + greenScale + blueScale + alphaScale;
m.heightFactors = Vector4(redScale, greenScale, blueScale, alphaScale);
}
/// Set normal map conversion filter.
void InputOptions::setNormalFilter(float small, float medium, float big, float large)
{
float total = small + medium + big + large;
m.bumpFrequencyScale = Vector4(small, medium, big, large) / total;
}
/// Enable mipmap normalization.
void InputOptions::setNormalizeMipmaps(bool normalize)
{
m.normalizeMipmaps = normalize;
}

95
src/nvtt/InputOptions.h Normal file
View File

@ -0,0 +1,95 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_INPUTOPTIONS_H
#define NV_TT_INPUTOPTIONS_H
#include <nvcore/Ptr.h>
#include <nvmath/Vector.h>
#include <nvimage/Image.h>
#include "nvtt.h"
namespace nvtt
{
struct InputOptions::Private
{
Private() : images(NULL) {}
WrapMode wrapMode;
TextureType textureType;
InputFormat inputFormat;
int faceCount;
int mipmapCount;
int imageCount;
struct Image;
Image * images;
// Quantization.
bool enableColorDithering;
bool enableAlphaDithering;
bool binaryAlpha;
int alphaThreshold; // reference value used for binary alpha quantization.
bool alphaTransparency; // set to true if alpha is used for transparency.
// Gamma conversion.
float inputGamma;
float outputGamma;
// Mipmap generation options.
bool generateMipmaps;
int maxLevel;
MipmapFilter mipmapFilter;
// Normal map options.
bool normalMap;
bool normalizeMipmaps;
bool convertToNormalMap;
nv::Vector4 heightFactors; // Used for cone mapping too.
nv::Vector4 bumpFrequencyScale;
// Cone map options.
bool convertToConeMap;
};
// Internal image structure.
struct InputOptions::Private::Image
{
Image() {}
int mipLevel;
int face;
int width;
int height;
int depth;
nv::AutoPtr<nv::Image> data;
};
} // nvtt namespace
#endif // NV_TT_INPUTOPTIONS_H

View File

@ -0,0 +1,32 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "nvtt.h"
using namespace nvtt;
/// Set default output options.
void OutputOptions::reset()
{
// endiannes = native...
}

View File

@ -0,0 +1,360 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "QuickCompressDXT.h"
using namespace nv;
using namespace QuickCompress;
inline static void extractColorBlockRGB(const ColorBlock & rgba, Vector3 block[16])
{
for (int i = 0; i < 16; i++)
{
const Color32 c = rgba.color(i);
block[i] = Vector3(c.r, c.g, c.b);
}
}
inline static uint extractColorBlockRGBA(const ColorBlock & rgba, Vector3 block[16])
{
int num = 0;
for (int i = 0; i < 16; i++)
{
const Color32 c = rgba.color(i);
if (c.a > 127)
{
block[num++] = Vector3(c.r, c.g, c.b);
}
}
return num;
}
// find minimum and maximum colors based on bounding box in color space
inline static void findMinMaxColorsBox(const Vector3 * block, uint num, Vector3 * __restrict maxColor, Vector3 * __restrict minColor)
{
*maxColor = Vector3(0, 0, 0);
*minColor = Vector3(255, 255, 255);
for (uint i = 0; i < num; i++)
{
*maxColor = max(*maxColor, block[i]);
*minColor = min(*minColor, block[i]);
}
}
inline static void selectDiagonal(const Vector3 * block, uint num, Vector3 * __restrict maxColor, Vector3 * __restrict minColor)
{
Vector3 center = (*maxColor + *minColor) * 0.5;
Vector2 covariance = Vector2(zero);
for (uint i = 0; i < num; i++)
{
Vector3 t = block[i] - center;
covariance += t.xy() * t.z();
}
float x0 = maxColor->x();
float y0 = maxColor->y();
float x1 = minColor->x();
float y1 = minColor->y();
if (covariance.x() < 0) {
swap(x0, x1);
}
if (covariance.y() < 0) {
swap(y0, y1);
}
maxColor->set(x0, y0, maxColor->z());
minColor->set(x1, y1, minColor->z());
}
inline static void insetBBox(Vector3 * __restrict maxColor, Vector3 * __restrict minColor)
{
Vector3 inset = (*maxColor - *minColor) / 16.0f - (8.0f / 255.0f) / 16.0f;
*maxColor = clamp(*maxColor - inset, 0.0f, 255.0f);
*minColor = clamp(*minColor + inset, 0.0f, 255.0f);
}
inline static uint16 roundAndExpand(Vector3 * v)
{
uint r = uint(clamp(v->x() * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f);
uint g = uint(clamp(v->y() * (63.0f / 255.0f), 0.0f, 63.0f) + 0.5f);
uint b = uint(clamp(v->z() * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f);
uint16 w = (r << 11) | (g << 5) | b;
r = (r << 3) | (r >> 2);
g = (g << 2) | (g >> 4);
b = (b << 3) | (b >> 2);
*v = Vector3(r, g, b);
return w;
}
inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1)
{
return dot(c0-c1, c0-c1);
}
inline static uint computeIndices4(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 palette[4];
palette[0] = maxColor;
palette[1] = minColor;
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
uint indices = 0;
for(int i = 0; i < 16; i++)
{
float d0 = colorDistance(palette[0], block[i]);
float d1 = colorDistance(palette[1], block[i]);
float d2 = colorDistance(palette[2], block[i]);
float d3 = colorDistance(palette[3], block[i]);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
inline static uint computeIndices3(const ColorBlock & rgba, Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 palette[4];
palette[0] = minColor;
palette[1] = maxColor;
palette[2] = (palette[0] + palette[1]) * 0.5f;
uint indices = 0;
for(int i = 0; i < 16; i++)
{
Color32 c = rgba.color(i);
Vector3 color = Vector3(c.r, c.g, c.b);
float d0 = colorDistance(palette[0], color);
float d1 = colorDistance(palette[1], color);
float d2 = colorDistance(palette[2], color);
uint index;
if (c.a < 128) index = 3;
else if (d0 < d1 && d0 < d2) index = 0;
else if (d1 < d2) index = 1;
else index = 2;
indices |= index << (2 * i);
}
return indices;
}
static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
{
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
Vector3 alphax_sum(zero);
Vector3 betax_sum(zero);
for( int i = 0; i < 16; ++i )
{
const uint bits = dxtBlock->indices >> (2 * i);
float beta = (bits & 1);
if (bits & 2) beta = (1 + beta) / 3.0f;
float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * block[i];
betax_sum += beta * block[i];
}
float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
if (equal(denom, 0.0f)) return;
float factor = 1.0f / denom;
Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
a = clamp(a, 0, 255);
b = clamp(b, 0, 255);
uint16 color0 = roundAndExpand(&a);
uint16 color1 = roundAndExpand(&b);
if (color0 < color1)
{
swap(a, b);
swap(color0, color1);
}
dxtBlock->col0 = Color16(color0);
dxtBlock->col1 = Color16(color1);
dxtBlock->indices = computeIndices4(block, a, b);
}
/*static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock)
{
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
Vector3 alphax_sum(zero);
Vector3 betax_sum(zero);
for( int i = 0; i < 16; ++i )
{
const uint bits = dxtBlock->indices >> (2 * i);
float beta = (bits & 1);
if (bits & 2) beta = 0.5f;
float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * block[i];
betax_sum += beta * block[i];
}
float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
if (equal(denom, 0.0f)) return;
float factor = 1.0f / denom;
Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
a = clamp(a, 0, 255);
b = clamp(b, 0, 255);
uint16 color0 = roundAndExpand(&a);
uint16 color1 = roundAndExpand(&b);
if (color0 < color1)
{
swap(a, b);
swap(color0, color1);
}
dxtBlock->col0 = Color16(color1);
dxtBlock->col1 = Color16(color0);
dxtBlock->indices = computeIndices3(block, a, b);
}*/
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
// read block
Vector3 block[16];
extractColorBlockRGB(rgba, block);
// find min and max colors
Vector3 maxColor, minColor;
findMinMaxColorsBox(block, 16, &maxColor, &minColor);
selectDiagonal(block, 16, &maxColor, &minColor);
insetBBox(&maxColor, &minColor);
uint16 color0 = roundAndExpand(&maxColor);
uint16 color1 = roundAndExpand(&minColor);
if (color0 < color1)
{
swap(maxColor, minColor);
swap(color0, color1);
}
dxtBlock->col0 = Color16(color0);
dxtBlock->col1 = Color16(color1);
dxtBlock->indices = computeIndices4(block, maxColor, minColor);
optimizeEndPoints4(block, dxtBlock);
}
void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
if (!rgba.hasAlpha())
{
compressDXT1(rgba, dxtBlock);
}
else
{
// read block
Vector3 block[16];
uint num = extractColorBlockRGBA(rgba, block);
// find min and max colors
Vector3 maxColor, minColor;
findMinMaxColorsBox(block, num, &maxColor, &minColor);
selectDiagonal(block, num, &maxColor, &minColor);
insetBBox(&maxColor, &minColor);
uint16 color0 = roundAndExpand(&maxColor);
uint16 color1 = roundAndExpand(&minColor);
if (color0 < color1)
{
swap(maxColor, minColor);
swap(color0, color1);
}
dxtBlock->col0 = Color16(color1);
dxtBlock->col1 = Color16(color0);
dxtBlock->indices = computeIndices3(rgba, maxColor, minColor);
// optimizeEndPoints(block, dxtBlock);
}
}

View File

@ -0,0 +1,41 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_QUICKCOMPRESSDXT_H
#define NV_TT_QUICKCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
namespace QuickCompress
{
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
}
} // nv namespace
#endif // NV_TT_QUICKCOMPRESSDXT_H

View File

@ -0,0 +1,979 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "CudaMath.h"
#define NUM_THREADS 64 // Number of threads per block.
#if __DEVICE_EMULATION__
#define __debugsync() __syncthreads()
#else
#define __debugsync()
#endif
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
template <class T>
__device__ inline void swap(T & a, T & b)
{
T tmp = a;
a = b;
b = tmp;
}
__constant__ float3 kColorMetric = { 1.0f, 1.0f, 1.0f };
////////////////////////////////////////////////////////////////////////////////
// Sort colors
////////////////////////////////////////////////////////////////////////////////
__device__ void sortColors(const float * values, int * cmp)
{
int tid = threadIdx.x;
cmp[tid] = (values[0] < values[tid]);
cmp[tid] += (values[1] < values[tid]);
cmp[tid] += (values[2] < values[tid]);
cmp[tid] += (values[3] < values[tid]);
cmp[tid] += (values[4] < values[tid]);
cmp[tid] += (values[5] < values[tid]);
cmp[tid] += (values[6] < values[tid]);
cmp[tid] += (values[7] < values[tid]);
cmp[tid] += (values[8] < values[tid]);
cmp[tid] += (values[9] < values[tid]);
cmp[tid] += (values[10] < values[tid]);
cmp[tid] += (values[11] < values[tid]);
cmp[tid] += (values[12] < values[tid]);
cmp[tid] += (values[13] < values[tid]);
cmp[tid] += (values[14] < values[tid]);
cmp[tid] += (values[15] < values[tid]);
// Resolve elements with the same index.
if (tid > 0 && cmp[tid] == cmp[0]) ++cmp[tid];
if (tid > 1 && cmp[tid] == cmp[1]) ++cmp[tid];
if (tid > 2 && cmp[tid] == cmp[2]) ++cmp[tid];
if (tid > 3 && cmp[tid] == cmp[3]) ++cmp[tid];
if (tid > 4 && cmp[tid] == cmp[4]) ++cmp[tid];
if (tid > 5 && cmp[tid] == cmp[5]) ++cmp[tid];
if (tid > 6 && cmp[tid] == cmp[6]) ++cmp[tid];
if (tid > 7 && cmp[tid] == cmp[7]) ++cmp[tid];
if (tid > 8 && cmp[tid] == cmp[8]) ++cmp[tid];
if (tid > 9 && cmp[tid] == cmp[9]) ++cmp[tid];
if (tid > 10 && cmp[tid] == cmp[10]) ++cmp[tid];
if (tid > 11 && cmp[tid] == cmp[11]) ++cmp[tid];
if (tid > 12 && cmp[tid] == cmp[12]) ++cmp[tid];
if (tid > 13 && cmp[tid] == cmp[13]) ++cmp[tid];
if (tid > 14 && cmp[tid] == cmp[14]) ++cmp[tid];
}
////////////////////////////////////////////////////////////////////////////////
// Load color block to shared mem
////////////////////////////////////////////////////////////////////////////////
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16])
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
__shared__ float dps[16];
if (idx < 16)
{
// Read color and copy to shared mem.
uint c = image[(bid) * 16 + idx];
colors[idx].z = ((c >> 0) & 0xFF) * (1.0f / 255.0f);
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
// No need to synchronize, 16 < warp size.
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Sort colors along the best fit line.
colorSums(colors, sums);
float3 axis = bestFitLine(colors, sums[0]);
dps[idx] = dot(colors[idx], axis);
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
sortColors(dps, xrefs);
float3 tmp = colors[idx];
colors[xrefs[idx]] = tmp;
}
}
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16])
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
__shared__ float3 rawColors[16];
__shared__ float dps[16];
if (idx < 16)
{
// Read color and copy to shared mem.
uint c = image[(bid) * 16 + idx];
rawColors[idx].z = ((c >> 0) & 0xFF) * (1.0f / 255.0f);
rawColors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
rawColors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
weights[idx] = (((c >> 24) & 0xFF) + 1) * (1.0f / 256.0f);
colors[idx] = rawColors[idx] * weights[idx];
// No need to synchronize, 16 < warp size.
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Sort colors along the best fit line.
colorSums(colors, sums);
float3 axis = bestFitLine(colors, sums[0]);
dps[idx] = dot(rawColors[idx], axis);
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
sortColors(dps, xrefs);
float3 tmp = colors[idx];
colors[xrefs[idx]] = tmp;
float w = weights[idx];
weights[xrefs[idx]] = w;
}
}
////////////////////////////////////////////////////////////////////////////////
// Round color to RGB565 and expand
////////////////////////////////////////////////////////////////////////////////
inline __device__ float3 roundAndExpand(float3 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 31.0f);
v.y = rintf(__saturatef(v.y) * 63.0f);
v.z = rintf(__saturatef(v.z) * 31.0f);
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z;
v.x *= 0.03227752766457f; // approximate integer bit expansion.
v.y *= 0.01583151765563f;
v.z *= 0.03227752766457f;
return v;
}
////////////////////////////////////////////////////////////////////////////////
// Evaluate permutations
////////////////////////////////////////////////////////////////////////////////
__device__ float evalPermutation4(const float3 * colors, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
// Compute alpha & beta for this permutation.
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
float beta = (bits & 1);
if (bits & 2) beta = (1 + beta) / 3.0f;
float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * colors[i];
betax_sum += beta * colors[i];
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start);
b = roundAndExpand(b, end);
// compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return dot(e, kColorMetric);
}
__device__ float evalPermutation3(const float3 * colors, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f);
// Compute alpha & beta for this permutation.
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
float beta = (bits & 1);
if (bits & 2) beta = 0.5f;
float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * colors[i];
betax_sum += beta * colors[i];
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start);
b = roundAndExpand(b, end);
// compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return dot(e, kColorMetric);
}
__constant__ float alphaTable4[4] = { 9.0f, 0.0f, 6.0f, 3.0f };
__constant__ float alphaTable3[4] = { 4.0f, 0.0f, 2.0f, 2.0f };
__constant__ const uint prods4[4] = { 0x090000,0x000900,0x040102,0x010402 };
__constant__ const uint prods3[4] = { 0x040000,0x000400,0x040101,0x010401 };
__device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float3 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start);
b = roundAndExpand(b, end);
// compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * dot(e, kColorMetric);
}
__device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable3[bits & 3] * colors[i];
akku += prods3[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float3 betax_sum = 4.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start);
b = roundAndExpand(b, end);
// compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 4.0f) * dot(e, kColorMetric);
}
__device__ float evalPermutation4(const float3 * colors, const float * weights, float3 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
// Compute alpha & beta for this permutation.
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
float beta = (bits & 1);
if (bits & 2) beta = (1 + beta) / 3.0f;
float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha * weights[i];
beta2_sum += beta * beta * weights[i];
alphabeta_sum += alpha * beta * weights[i];
alphax_sum += alpha * colors[i];
}
float3 betax_sum = color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start);
b = roundAndExpand(b, end);
// compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return dot(e, kColorMetric);
}
/*
__device__ float evalPermutation3(const float3 * colors, const float * weights, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f);
// Compute alpha & beta for this permutation.
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
float beta = (bits & 1);
if (bits & 2) beta = 0.5f;
float alpha = 1.0f - beta;
alpha2_sum += alpha * alpha * weights[i];
beta2_sum += beta * beta * weights[i];
alphabeta_sum += alpha * beta * weights[i];
alphax_sum += alpha * colors[i];
}
float3 betax_sum = color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6-5 color and expand...
a = roundAndExpand(a, start);
b = roundAndExpand(b, end);
// compute the error
float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return dot(e, kColorMetric);
}
*/
////////////////////////////////////////////////////////////////////////////////
// Evaluate all permutations
////////////////////////////////////////////////////////////////////////////////
__device__ void evalAllPermutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
__shared__ uint s_permutations[160];
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
if (pidx < 160) s_permutations[pidx] = permutation;
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
for(int i = 0; i < 3; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 160) break;
ushort start, end;
uint permutation = s_permutations[pidx];
float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
if (bestStart > bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
}
}
}
errors[idx] = bestError;
}
/*
__device__ void evalAllPermutations(const float3 * colors, const float * weights, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
__shared__ uint s_permutations[160];
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
if (pidx < 160) s_permutations[pidx] = permutation;
float error = evalPermutation4(colors, weights, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
for(int i = 0; i < 3; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 160) break;
ushort start, end;
uint permutation = s_permutations[pidx];
float error = evalPermutation3(colors, weights, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
if (bestStart > bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
}
}
}
errors[idx] = bestError;
}
*/
__device__ void evalLevel4Permutations(const float3 * colors, const float * weights, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
float error = evalPermutation4(colors, weights, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
errors[idx] = bestError;
}
////////////////////////////////////////////////////////////////////////////////
// Find index with minimum error
////////////////////////////////////////////////////////////////////////////////
__device__ int findMinError(float * errors)
{
const int idx = threadIdx.x;
__shared__ int indices[NUM_THREADS];
indices[idx] = idx;
#if __DEVICE_EMULATION__
for(int d = NUM_THREADS/2; d > 0; d >>= 1)
{
__syncthreads();
if (idx < d)
{
float err0 = errors[idx];
float err1 = errors[idx + d];
if (err1 < err0) {
errors[idx] = err1;
indices[idx] = indices[idx + d];
}
}
}
#else
for(int d = NUM_THREADS/2; d > 32; d >>= 1)
{
__syncthreads();
if (idx < d)
{
float err0 = errors[idx];
float err1 = errors[idx + d];
if (err1 < err0) {
errors[idx] = err1;
indices[idx] = indices[idx + d];
}
}
}
__syncthreads();
// unroll last 6 iterations
if (idx < 32)
{
if (errors[idx + 32] < errors[idx]) {
errors[idx] = errors[idx + 32];
indices[idx] = indices[idx + 32];
}
if (errors[idx + 16] < errors[idx]) {
errors[idx] = errors[idx + 16];
indices[idx] = indices[idx + 16];
}
if (errors[idx + 8] < errors[idx]) {
errors[idx] = errors[idx + 8];
indices[idx] = indices[idx + 8];
}
if (errors[idx + 4] < errors[idx]) {
errors[idx] = errors[idx + 4];
indices[idx] = indices[idx + 4];
}
if (errors[idx + 2] < errors[idx]) {
errors[idx] = errors[idx + 2];
indices[idx] = indices[idx + 2];
}
if (errors[idx + 1] < errors[idx]) {
errors[idx] = errors[idx + 1];
indices[idx] = indices[idx + 1];
}
}
#endif
__syncthreads();
return indices[0];
}
////////////////////////////////////////////////////////////////////////////////
// Save DXT block
////////////////////////////////////////////////////////////////////////////////
__device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result)
{
const int bid = blockIdx.x;
if (start == end)
{
permutation = 0;
}
// Reorder permutation.
uint indices = 0;
for(int i = 0; i < 16; i++)
{
int ref = xrefs[i];
indices |= ((permutation >> (2 * ref)) & 3) << (2 * i);
}
// Write endpoints.
result[bid].x = (end << 16) | start;
// Write palette indices.
result[bid].y = indices;
}
////////////////////////////////////////////////////////////////////////////////
// Compress color block
////////////////////////////////////////////////////////////////////////////////
__global__ void compress(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float3 colors[16];
__shared__ float3 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressWeighted(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float3 colors[16];
__shared__ float3 sums[16];
__shared__ float weights[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, weights, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalLevel4Permutations(colors, weights, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
/*
__device__ float computeError(const float weights[16], uchar a0, uchar a1)
{
float palette[6];
palette[0] = (6.0f/7.0f * a0 + 1.0f/7.0f * a1);
palette[1] = (5.0f/7.0f * a0 + 2.0f/7.0f * a1);
palette[2] = (4.0f/7.0f * a0 + 3.0f/7.0f * a1);
palette[3] = (3.0f/7.0f * a0 + 4.0f/7.0f * a1);
palette[4] = (2.0f/7.0f * a0 + 5.0f/7.0f * a1);
palette[5] = (1.0f/7.0f * a0 + 6.0f/7.0f * a1);
float total = 0.0f;
for (uint i = 0; i < 16; i++)
{
float alpha = weights[i];
float error = a0 - alpha;
error = min(error, palette[0] - alpha);
error = min(error, palette[1] - alpha);
error = min(error, palette[2] - alpha);
error = min(error, palette[3] - alpha);
error = min(error, palette[4] - alpha);
error = min(error, palette[5] - alpha);
error = min(error, a1 - alpha);
total += error;
}
return total;
}
inline __device__ uchar roundAndExpand(float a)
{
return rintf(__saturatef(a) * 255.0f);
}
*/
/*
__device__ void optimizeAlpha8(const float alphas[16], uchar & a0, uchar & a1)
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++)
{
uint idx = index[i];
float alpha;
if (idx < 2) alpha = 1.0f - idx;
else alpha = (8.0f - idx) / 7.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * alphas[i];
betax_sum += beta * alphas[i];
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
a0 = roundAndExpand(a);
a1 = roundAndExpand(b);
}
*/
/*
__device__ void compressAlpha(const float alphas[16], uint4 * result)
{
const int tid = threadIdx.x;
// Compress alpha block!
// Brute force approach:
// Try all color pairs: 256*256/2 = 32768, 32768/64 = 512 iterations?
// Determine min & max alphas
float A0, A1;
if (tid < 16)
{
__shared__ uint s_alphas[16];
s_alphas[tid] = alphas[tid];
s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^8]);
s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^4]);
s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^2]);
s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^1]);
A0 = s_alphas[tid];
s_alphas[tid] = alphas[tid];
s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^8]);
s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^4]);
s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^2]);
s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^1]);
A1 = s_alphas[tid];
}
__syncthreads();
int minIdx = 0;
if (A1 - A0 > 8)
{
float bestError = FLT_MAX;
// 64 threads -> 8x8
// divide [A1-A0] in partitions.
// test endpoints
for (int i = 0; i < 128; i++)
{
uint idx = (i * NUM_THREADS + tid) * 4;
uchar a0 = idx & 255;
uchar a1 = idx >> 8;
float error = computeError(alphas, a0, a1);
if (error < bestError)
{
bestError = error;
A0 = a0;
A1 = a1;
}
}
__shared__ float errors[NUM_THREADS];
errors[tid] = bestError;
// Minimize error.
minIdx = findMinError(errors);
}
if (minIdx == tid)
{
// @@ Compute indices.
// @@ Write alpha block.
}
}
__global__ void compressDXT5(const uint * permutations, const uint * image, uint4 * result)
{
__shared__ float3 colors[16];
__shared__ float3 sums[16];
__shared__ float weights[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, weights, xrefs);
__syncthreads();
compressAlpha(weights, result);
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalLevel4Permutations(colors, weights, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, (uint2 *)result);
}
}
*/
////////////////////////////////////////////////////////////////////////////////
// Setup kernel
////////////////////////////////////////////////////////////////////////////////
extern "C" void setupCompressKernel(const float weights[3])
{
// Set constants.
cudaMemcpyToSymbol(kColorMetric, weights, sizeof(float) * 3, 0);
}
////////////////////////////////////////////////////////////////////////////////
// Launch kernel
////////////////////////////////////////////////////////////////////////////////
extern "C" void compressKernel(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compress<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressWeightedKernel(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressWeighted<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}

View File

@ -0,0 +1,194 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "CudaMath.h"
#define TW 16
#define TH 16
#define THREAD_COUNT (TW * TH)
#define MAX_KERNEL_WIDTH 32
#define KW 4
#if __DEVICE_EMULATION__
#define __debugsync() __syncthreads()
#else
#define __debugsync()
#endif
__constant__ float inputGamma, outputInverseGamma;
__constant__ float kernel[MAX_KERNEL_WIDTH];
// Use texture to access input?
// That's the most simple approach.
texture<> image;
////////////////////////////////////////////////////////////////////////////////
// Combined convolution filter
////////////////////////////////////////////////////////////////////////////////
__global__ void convolve(float4 * output)
{
// @@ Use morton order to assing threads.
int x = threadIdx.x;
int y = threadIdx.y;
float4 color = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
// texture coordinate.
int2 t;
t.x = 2 * (blockIdx.x * TW + x) - HW;
t.y = blockIdx.y * TH + y;
// @@ We might want to loop and process strips, to reuse the results of the horizontal convolutions.
// Horizontal convolution. @@ Unroll loops.
for (int e = HW; e > 0; e--)
{
t.x++;
float w = kernel[e-1];
color += w * tex2D(image, tc);
}
for (int e = 0; e < HW; e++)
{
t.x++;
float w = kernel[e];
color += w * tex2D(image, tc);
}
// Write color to shared memory.
__shared__ float tile[4 * THREAD_COUNT];
int tileIdx = y * TW + x;
tile[tileIdx + 0 * THREAD_COUNT] = color.x;
tile[tileIdx + 1 * THREAD_COUNT] = color.y;
tile[tileIdx + 2 * THREAD_COUNT] = color.z;
tile[tileIdx + 3 * THREAD_COUNT] = color.w;
__syncthreads();
// tile coordinate.
t.x = x;
t.y = y - HW;
// Vertical convolution. @@ Unroll loops.
for (int i = HW; i > 0; i--)
{
float w = kernel[i-1];
t.y++;
int idx = t.y * TW + t.x;
color.x += w * tile[idx + 0 * THREAD_COUNT];
color.y += w * tile[idx + 1 * THREAD_COUNT];
color.z += w * tile[idx + 2 * THREAD_COUNT];
color.w += w * tile[idx + 3 * THREAD_COUNT];
}
for (int i = 0; i < HW; i++)
{
float w = kernel[i];
t.y++;
int idx = t.y * TW + t.x;
color.x += w * tile[idx + 0 * THREAD_COUNT];
color.y += w * tile[idx + 1 * THREAD_COUNT];
color.z += w * tile[idx + 2 * THREAD_COUNT];
color.w += w * tile[idx + 3 * THREAD_COUNT];
}
it (x < w && y < h)
{
// @@ Prevent unaligned writes.
output[y * w + h] = color;
}
}
////////////////////////////////////////////////////////////////////////////////
// Monophase X convolution filter
////////////////////////////////////////////////////////////////////////////////
__device__ void convolveY()
{
}
////////////////////////////////////////////////////////////////////////////////
// Mipmap convolution filter
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Gamma correction
////////////////////////////////////////////////////////////////////////////////
/*
__device__ float toLinear(float f, float gamma = 2.2f)
{
return __pow(f, gamma);
}
__device__ float toGamma(float f, float gamma = 2.2f)
{
return pow(f, 1.0f / gamma);
}
*/
////////////////////////////////////////////////////////////////////////////////
// Setup kernel
////////////////////////////////////////////////////////////////////////////////
extern "C" void setupConvolveKernel(const float * k, int w)
{
w = min(w, MAX_KERNEL_WIDTH);
cudaMemcpyToSymbol(kernel, k, sizeof(float) * w, 0);
}
////////////////////////////////////////////////////////////////////////////////
// Launch kernel
////////////////////////////////////////////////////////////////////////////////

View File

@ -0,0 +1,676 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Containers.h>
#include <nvmath/Color.h>
#include <nvmath/Fitting.h>
#include <nvimage/Image.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvtt/CompressionOptions.h>
#include <nvtt/FastCompressDXT.h>
#include "CudaCompressDXT.h"
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda_runtime.h>
#endif
#include <time.h>
#include <stdio.h>
using namespace nv;
using namespace nvtt;
#if defined HAVE_CUDA
extern "C" void setupCompressKernel(const float weights[3]);
extern "C" void compressKernel(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressWeightedKernel(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
static uint * d_bitmaps = NULL;
static void doPrecomputation()
{
if (d_bitmaps != NULL) {
return;
}
uint bitmaps[1024];
int indices[16];
int num = 0;
// Compute bitmaps with 3 clusters:
// first cluster [0,i) is at the start
for( int m = 0; m < 16; ++m )
{
indices[m] = 0;
}
const int imax = 15;
for( int i = imax; i >= 0; --i )
{
// second cluster [i,j) is half along
for( int m = i; m < 16; ++m )
{
indices[m] = 2;
}
const int jmax = ( i == 0 ) ? 15 : 16;
for( int j = jmax; j >= i; --j )
{
// last cluster [j,k) is at the end
if( j < 16 )
{
indices[j] = 1;
}
uint bitmap = 0;
for(int p = 0; p < 16; p++) {
bitmap |= indices[p] << (p * 2);
}
bitmaps[num] = bitmap;
num++;
}
}
nvDebugCheck(num == 151);
// Align to 160.
for(int i = 0; i < 9; i++)
{
bitmaps[num] = 0x555AA000;
num++;
}
nvDebugCheck(num == 160);
// Append bitmaps with 4 clusters:
// first cluster [0,i) is at the start
for( int m = 0; m < 16; ++m )
{
indices[m] = 0;
}
for( int i = imax; i >= 0; --i )
{
// second cluster [i,j) is one third along
for( int m = i; m < 16; ++m )
{
indices[m] = 2;
}
const int jmax = ( i == 0 ) ? 15 : 16;
for( int j = jmax; j >= i; --j )
{
// third cluster [j,k) is two thirds along
for( int m = j; m < 16; ++m )
{
indices[m] = 3;
}
int kmax = ( j == 0 ) ? 15 : 16;
for( int k = kmax; k >= j; --k )
{
// last cluster [k,n) is at the end
if( k < 16 )
{
indices[k] = 1;
}
uint bitmap = 0;
bool hasThree = false;
for(int p = 0; p < 16; p++) {
bitmap |= indices[p] << (p * 2);
if (indices[p] == 3) hasThree = true;
}
if (hasThree) {
bitmaps[num] = bitmap;
num++;
}
}
}
}
nvDebugCheck(num == 975);
// Align to 1024.
for(int i = 0; i < 49; i++)
{
bitmaps[num] = 0x555AA000;
num++;
}
nvDebugCheck(num == 1024);
/*
printf("uint bitmaps[1024] = {\n");
for (int i = 0; i < 1024; i++)
{
printf("\t0x%.8X,\n", bitmaps[i]);
}
printf("};\n");
*/
// Upload bitmaps.
cudaMalloc((void**) &d_bitmaps, 1024 * sizeof(uint));
cudaMemcpy(d_bitmaps, bitmaps, 1024 * sizeof(uint), cudaMemcpyHostToDevice);
// @@ Check for errors.
// @@ Free allocated memory.
}
#endif
// Convert linear image to block linear.
static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
{
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
for(uint by = 0; by < h; by++) {
for(uint bx = 0; bx < w; bx++) {
const uint bw = min(image->width() - bx * 4, 4U);
const uint bh = min(image->height() - by * 4, 4U);
for (uint i = 0; i < 16; i++) {
const int x = (i % 4) % bw;
const int y = (i / 4) % bh;
blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
}
}
}
}
// @@ This code is very repetitive and needs to be cleaned up.
/// Compress image using CUDA.
void nv::cudaCompressDXT1(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
doPrecomputation();
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
const uint blockMax = 32768; // 49152, 65535
// Allocate image in device memory.
uint * d_data = NULL;
cudaMalloc((void**) &d_data, min(imageSize, blockMax * 64U));
// Allocate result.
uint * d_result = NULL;
cudaMalloc((void**) &d_result, min(compressedSize, blockMax * 8U));
setupCompressKernel(compressionOptions.colorWeight.ptr());
clock_t start = clock();
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, blockMax);
cudaMemcpy(d_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernel(count, d_data, d_result, d_bitmaps);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, d_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
cudaFree(d_data);
cudaFree(d_result);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
/// Compress image using CUDA.
void nv::cudaCompressDXT3(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
doPrecomputation();
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
const uint blockMax = 32768; // 49152, 65535
// Allocate image in device memory.
uint * d_data = NULL;
cudaMalloc((void**) &d_data, min(imageSize, blockMax * 64U));
// Allocate result.
uint * d_result = NULL;
cudaMalloc((void**) &d_result, min(compressedSize, blockMax * 8U));
AlphaBlockDXT3 * alphaBlocks = NULL;
alphaBlocks = (AlphaBlockDXT3 *)malloc(min(compressedSize, blockMax * 8U));
setupCompressKernel(compressionOptions.colorWeight.ptr());
clock_t start = clock();
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, blockMax);
cudaMemcpy(d_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressWeightedKernel(count, d_data, d_result, d_bitmaps);
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
compressBlock(rgba, alphaBlocks + i);
}
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, d_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
for (uint i = 0; i < count; i++)
{
outputOptions.outputHandler->writeData(alphaBlocks + i, 8);
outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8);
}
}
bn += count;
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
cudaFree(d_data);
cudaFree(d_result);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
/// Compress image using CUDA.
void nv::cudaCompressDXT5(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
doPrecomputation();
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
const uint blockMax = 32768; // 49152, 65535
// Allocate image in device memory.
uint * d_data = NULL;
cudaMalloc((void**) &d_data, min(imageSize, blockMax * 64U));
// Allocate result.
uint * d_result = NULL;
cudaMalloc((void**) &d_result, min(compressedSize, blockMax * 8U));
AlphaBlockDXT5 * alphaBlocks = NULL;
alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, blockMax * 8U));
setupCompressKernel(compressionOptions.colorWeight.ptr());
clock_t start = clock();
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, blockMax);
cudaMemcpy(d_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressWeightedKernel(count, d_data, d_result, d_bitmaps);
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
compressBlock_Iterative(rgba, alphaBlocks + i);
}
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, d_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
for (uint i = 0; i < count; i++)
{
outputOptions.outputHandler->writeData(alphaBlocks + i, 8);
outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8);
}
}
bn += count;
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
cudaFree(d_data);
cudaFree(d_result);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
#if defined HAVE_CUDA
class Task
{
public:
explicit Task(uint numBlocks) : blockMaxCount(numBlocks), blockCount(0)
{
// System memory allocations.
blockLinearImage = new uint[blockMaxCount * 16];
xrefs = new uint[blockMaxCount * 16];
// Device memory allocations.
cudaMalloc((void**) &d_blockLinearImage, blockMaxCount * 16 * sizeof(uint));
cudaMalloc((void**) &d_compressedImage, blockMaxCount * 8U);
// @@ Check for allocation errors.
}
~Task()
{
delete [] blockLinearImage;
delete [] xrefs;
cudaFree(d_blockLinearImage);
cudaFree(d_compressedImage);
}
void addColorBlock(const ColorBlock & rgba)
{
nvDebugCheck(!isFull());
// @@ Count unique colors?
/*
// Convert colors to vectors.
Array<Vector3> pointArray(16);
for(int i = 0; i < 16; i++) {
const Color32 color = rgba.color(i);
pointArray.append(Vector3(color.r, color.g, color.b));
}
// Find best fit line.
const Vector3 axis = Fit::bestLine(pointArray).direction();
// Project points to axis.
float dps[16];
uint * order = &xrefs[blockCount * 16];
for (uint i = 0; i < 16; ++i)
{
dps[i] = dot(pointArray[i], axis);
order[i] = i;
}
// Sort them.
for (uint i = 0; i < 16; ++i)
{
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
{
swap(dps[j], dps[j - 1]);
swap(order[j], order[j - 1]);
}
}
*/
// Write sorted colors to blockLinearImage.
for(uint i = 0; i < 16; ++i)
{
// blockLinearImage[blockCount * 16 + i] = rgba.color(order[i]);
blockLinearImage[blockCount * 16 + i] = rgba.color(i);
}
++blockCount;
}
bool isFull()
{
nvDebugCheck(blockCount <= blockMaxCount);
return blockCount == blockMaxCount;
}
void flush(const OutputOptions & outputOptions)
{
if (blockCount == 0)
{
// Nothing to do.
return;
}
// Copy input color blocks.
cudaMemcpy(d_blockLinearImage, blockLinearImage, blockCount * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernel(blockCount, d_blockLinearImage, d_compressedImage, d_bitmaps);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
uint * compressedImage = blockLinearImage;
cudaMemcpy(compressedImage, d_compressedImage, blockCount * 8, cudaMemcpyDeviceToHost);
// @@ Sort block indices.
// Output result.
if (outputOptions.outputHandler != NULL)
{
// outputOptions.outputHandler->writeData(compressedImage, blockCount * 8);
}
blockCount = 0;
}
private:
const uint blockMaxCount;
uint blockCount;
uint * blockLinearImage;
uint * xrefs;
uint * d_blockLinearImage;
uint * d_compressedImage;
};
#endif // defined HAVE_CUDA
void nv::cudaCompressDXT1_2(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
#if defined HAVE_CUDA
const uint w = image->width();
const uint h = image->height();
const uint blockNum = ((w + 3) / 4) * ((h + 3) / 4);
const uint blockMax = 32768; // 49152, 65535
doPrecomputation();
setupCompressKernel(compressionOptions.colorWeight.ptr());
ColorBlock rgba;
Task task(min(blockNum, blockMax));
clock_t start = clock();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
task.addColorBlock(rgba);
if (task.isFull())
{
task.flush(outputOptions);
}
}
}
task.flush(outputOptions);
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}

View File

@ -0,0 +1,43 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_CUDACOMPRESSDXT_H
#define NV_TT_CUDACOMPRESSDXT_H
#include <nvimage/nvimage.h>
#include <nvtt/nvtt.h>
namespace nv
{
class Image;
void cudaCompressDXT1(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void cudaCompressDXT3(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void cudaCompressDXT5(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void cudaCompressDXT1_2(const Image * image, const nvtt::OutputOptions & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
} // nv namespace
#endif // NV_TT_CUDAUTILS_H

221
src/nvtt/cuda/CudaMath.h Normal file
View File

@ -0,0 +1,221 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// Math functions and operators to be used with vector types.
#ifndef CUDAMATH_H
#define CUDAMATH_H
#include <float.h>
inline __device__ __host__ float3 operator *(float3 a, float3 b)
{
return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
}
inline __device__ __host__ float3 operator *(float f, float3 v)
{
return make_float3(v.x*f, v.y*f, v.z*f);
}
inline __device__ __host__ float3 operator *(float3 v, float f)
{
return make_float3(v.x*f, v.y*f, v.z*f);
}
inline __device__ __host__ float3 operator +(float3 a, float3 b)
{
return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
}
inline __device__ __host__ void operator +=(float3 & b, float3 a)
{
b.x += a.x;
b.y += a.y;
b.z += a.z;
}
inline __device__ __host__ float3 operator -(float3 a, float3 b)
{
return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
}
inline __device__ __host__ void operator -=(float3 & b, float3 a)
{
b.x -= a.x;
b.y -= a.y;
b.z -= a.z;
}
inline __device__ __host__ float3 operator /(float3 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float3 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
b.z *= inv;
}
inline __device__ __host__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
inline __device__ __host__ float dot(float4 a, float4 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
}
inline __device__ __host__ float clamp(float f, float a, float b)
{
return max(a, min(f, b));
}
inline __device__ __host__ float3 clamp(float3 v, float a, float b)
{
return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
{
return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
inline __device__ __host__ float3 normalize(float3 v)
{
float len = 1.0f / sqrtf(dot(v, v));
return make_float3(v.x * len, v.y * len, v.z * len);
}
// Use power method to find the first eigenvector.
// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html
inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
{
// 8 iterations seems to be more than enough.
float3 v = make_float3(1.0f, 1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
float m = max(max(x, y), z);
float iv = 1.0f / m;
#if __DEVICE_EMULATION__
if (m == 0.0f) iv = 0.0f;
#endif
v = make_float3(x*iv, y*iv, z*iv);
}
return v;
}
inline __device__ void colorSums(const float3 * colors, float3 * sums)
{
#if __DEVICE_EMULATION__
float3 color_sum = make_float3(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx];
sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1];
#endif
}
inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[6] = {0, 0, 0, 0, 0, 0};
for (int i = 0; i < 16; i++)
{
float3 a = colors[i] - color_sum * (1.0f / 16.0f);
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[2] += a.x * a.z;
covariance[3] += a.y * a.y;
covariance[4] += a.y * a.z;
covariance[5] += a.z * a.z;
}
#else
const int idx = threadIdx.x;
float3 diff = colors[idx] - color_sum * (1.0f / 16.0f);
// @@ Eliminate two-way bank conflicts here.
// @@ It seems that doing that and unrolling the reduction doesn't help...
__shared__ float covariance[16*6];
covariance[6 * idx + 0] = diff.x * diff.x; // 0, 6, 12, 2, 8, 14, 4, 10, 0
covariance[6 * idx + 1] = diff.x * diff.y;
covariance[6 * idx + 2] = diff.x * diff.z;
covariance[6 * idx + 3] = diff.y * diff.y;
covariance[6 * idx + 4] = diff.y * diff.z;
covariance[6 * idx + 5] = diff.z * diff.z;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[6 * idx + 0] += covariance[6 * (idx+d) + 0];
covariance[6 * idx + 1] += covariance[6 * (idx+d) + 1];
covariance[6 * idx + 2] += covariance[6 * (idx+d) + 2];
covariance[6 * idx + 3] += covariance[6 * (idx+d) + 3];
covariance[6 * idx + 4] += covariance[6 * (idx+d) + 4];
covariance[6 * idx + 5] += covariance[6 * (idx+d) + 5];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector(covariance);
}
#endif // CUDAMATH_H

113
src/nvtt/cuda/CudaUtils.cpp Normal file
View File

@ -0,0 +1,113 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda_runtime.h>
#endif
using namespace nv;
using namespace cuda;
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
}
#endif
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
return !isWindowsVista() && deviceCount() > 0;
//return !isWindowsVista() && isWow32() && deviceCount() > 0;
#else
return deviceCount() > 0;
#endif
#else
return false;
#endif
}
/// Get number of CUDA enabled devices.
int nv::cuda::deviceCount()
{
#if defined HAVE_CUDA
int gpuCount = 0;
cudaError_t result = cudaGetDeviceCount(&gpuCount);
if (result == cudaSuccess)
{
return gpuCount;
}
#endif
return 0;
}
/// Activate the given devices.
bool nv::cuda::setDevice(int i)
{
nvCheck(i < deviceCount());
#if defined HAVE_CUDA
cudaError_t result = cudaSetDevice(i);
return result == cudaSuccess;
#else
return false;
#endif
}

40
src/nvtt/cuda/CudaUtils.h Normal file
View File

@ -0,0 +1,40 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_CUDAUTILS_H
#define NV_TT_CUDAUTILS_H
namespace nv
{
namespace cuda
{
bool isHardwarePresent();
int deviceCount();
bool setDevice(int i);
};
} // nv namespace
#endif // NV_TT_CUDAUTILS_H

530
src/nvtt/dxtlib.cpp Normal file
View File

@ -0,0 +1,530 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Memory.h>
#include <nvcore/Ptr.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include <nvimage/Filter.h>
#include <nvimage/Quantize.h>
#include <nvimage/NormalMap.h>
#include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "CompressRGB.h"
#include "InputOptions.h"
#include "CompressionOptions.h"
#include "cuda/CudaUtils.h"
#include "cuda/CudaCompressDXT.h"
using namespace nv;
using namespace nvtt;
namespace
{
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a) {
return 8;
}
else if (format == Format_DXT3) {
return 16;
}
else if (format == Format_DXT5 || format == Format_DXT5n) {
return 16;
}
else if (format == Format_BC4) {
return 8;
}
else if (format == Format_BC5) {
return 16;
}
return 0;
}
inline uint computePitch(uint w, uint bitsize)
{
uint p = w * ((bitsize + 7) / 8);
// Align to 32 bits.
return ((p + 3) / 4) * 4;
}
static int computeImageSize(uint w, uint h, uint bitCount, Format format)
{
if (format == Format_RGBA) {
return h * computePitch(w, bitCount);
}
else {
return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format);
}
}
} // namespace
//
// compress
//
static void outputHeader(const InputOptions::Private & inputOptions, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
// Output DDS header.
if (outputOptions.outputHandler != NULL && outputOptions.outputHeader)
{
DDSHeader header;
InputOptions::Private::Image * img = inputOptions.images;
nvCheck(img != NULL);
header.setWidth(img->width);
header.setHeight(img->height);
int mipmapCount = inputOptions.mipmapCount;
if (!inputOptions.generateMipmaps) mipmapCount = 0;
else if (inputOptions.maxLevel != -1 && inputOptions.maxLevel < mipmapCount) mipmapCount = inputOptions.maxLevel;
header.setMipmapCount(mipmapCount);
if (inputOptions.textureType == TextureType_2D) {
header.setTexture2D();
}
else if (inputOptions.textureType == TextureType_Cube) {
header.setTextureCube();
}
/*else if (inputOptions.textureType == TextureType_3D) {
header.setTexture3D();
header.setDepth(img->depth);
}*/
if (compressionOptions.format == Format_RGBA)
{
header.setPitch(4 * img->width);
header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask);
}
else
{
header.setLinearSize(computeImageSize(img->width, img->height, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
}
else if (compressionOptions.format == Format_DXT3) {
header.setFourCC('D', 'X', 'T', '3');
}
else if (compressionOptions.format == Format_DXT5) {
header.setFourCC('D', 'X', 'T', '5');
}
else if (compressionOptions.format == Format_DXT5n) {
header.setFourCC('D', 'X', 'T', '5');
header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_BC4) {
header.setFourCC('A', 'T', 'I', '1');
}
else if (compressionOptions.format == Format_BC5) {
header.setFourCC('A', 'T', 'I', '2');
header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
header.swapBytes();
nvStaticCheck(sizeof(DDSHeader) == 128 + 20);
if (header.hasDX10Header())
{
outputOptions.outputHandler->writeData(&header, 128 + 20);
}
else
{
outputOptions.outputHandler->writeData(&header, 128);
}
// Revert swap.
header.swapBytes();
}
}
static bool compressMipmap(const Image * image, const OutputOptions & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvDebugCheck(image != NULL);
if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
{
compressRGB(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_DXT1)
{
#if defined(HAVE_S3QUANT)
if (compressionOptions.externalCompressor == "s3")
{
s3CompressDXT1(image, outputOptions);
}
else
#endif
#if defined(HAVE_ATITC)
if (compressionOptions.externalCompressor == "ati")
{
printf("ATI\n");
atiCompressDXT1(image, outputOptions);
}
else
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT1(image, outputOptions);
}
else
{
if (compressionOptions.useCuda && nv::cuda::isHardwarePresent())
{
cudaCompressDXT1(image, outputOptions, compressionOptions);
}
else
{
compressDXT1(image, outputOptions, compressionOptions);
}
}
}
else if (compressionOptions.format == Format_DXT1a)
{
// @@ Only fast compression mode for now.
fastCompressDXT1a(image, outputOptions);
}
else if (compressionOptions.format == Format_DXT3)
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT3(image, outputOptions);
}
else
{
if (compressionOptions.useCuda && nv::cuda::isHardwarePresent())
{
cudaCompressDXT3(image, outputOptions, compressionOptions);
}
else
{
compressDXT3(image, outputOptions, compressionOptions);
}
}
}
else if (compressionOptions.format == Format_DXT5)
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT5(image, outputOptions);
}
else
{
if (compressionOptions.useCuda && nv::cuda::isHardwarePresent())
{
cudaCompressDXT5(image, outputOptions, compressionOptions);
}
else
{
compressDXT5(image, outputOptions, compressionOptions);
}
}
}
else if (compressionOptions.format == Format_DXT5n)
{
if (compressionOptions.quality == Quality_Fastest)
{
fastCompressDXT5n(image, outputOptions);
}
else
{
compressDXT5n(image, outputOptions, compressionOptions);
}
}
else if (compressionOptions.format == Format_BC4)
{
compressBC4(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_BC5)
{
compressBC5(image, outputOptions, compressionOptions);
}
return true;
}
// Convert input image to linear float image.
static FloatImage * toFloatImage(const Image * image, const InputOptions::Private & inputOptions)
{
nvDebugCheck(image != NULL);
FloatImage * floatImage = new FloatImage(image);
if (inputOptions.normalMap)
{
// Expand normals. to [-1, 1] range.
// floatImage->expandNormals(0);
}
else if (inputOptions.inputGamma != 1.0f)
{
// Convert to linear space.
floatImage->toLinear(0, 3, inputOptions.inputGamma);
}
return floatImage;
}
// Convert linear float image to output image.
static Image * toFixedImage(const FloatImage * floatImage, const InputOptions::Private & inputOptions)
{
nvDebugCheck(floatImage != NULL);
return floatImage->createImageGammaCorrect(inputOptions.outputGamma);
}
// Create mipmap from the given image.
static FloatImage * createMipmap(const FloatImage * floatImage, const InputOptions::Private & inputOptions)
{
FloatImage * result = NULL;
if (inputOptions.mipmapFilter == MipmapFilter_Box)
{
// Use fast downsample.
result = floatImage->fastDownSample();
}
else if (inputOptions.mipmapFilter == MipmapFilter_Triangle)
{
Kernel1 kernel(4);
kernel.initFilter(Filter::Triangle);
result = floatImage->downSample(kernel, (FloatImage::WrapMode)inputOptions.wrapMode);
}
else /*if (inputOptions.mipmapFilter == MipmapFilter_Kaiser)*/
{
Kernel1 kernel(10);
kernel.initKaiser(8.0, 0.75f);
result = floatImage->downSample(kernel, (FloatImage::WrapMode)inputOptions.wrapMode);
}
// Normalize mipmap.
if (inputOptions.normalizeMipmaps)
{
normalize(result);
}
return result;
}
// Quantize the input image to the precision of the output format.
static void quantize(Image * img, const InputOptions::Private & inputOptions, Format format)
{
if (inputOptions.enableColorDithering)
{
if (format >= Format_DXT1 && format <= Format_DXT5)
{
Quantize::FloydSteinberg_RGB16(img);
}
}
if (inputOptions.binaryAlpha)
{
if (inputOptions.enableAlphaDithering)
{
Quantize::FloydSteinberg_BinaryAlpha(img, inputOptions.alphaThreshold);
}
else
{
Quantize::BinaryAlpha(img, inputOptions.alphaThreshold);
}
}
else
{
if (inputOptions.enableAlphaDithering)
{
if (format == Format_DXT3)
{
Quantize::Alpha4(img);
}
else if (format == Format_DXT1a)
{
Quantize::BinaryAlpha(img, inputOptions.alphaThreshold);
}
}
}
}
/// Compress the input texture with the given compression options.
bool nvtt::compress(const InputOptions & inputOptions, const OutputOptions & outputOptions, const CompressionOptions & compressionOptions)
{
// Make sure enums match.
nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp);
nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror);
nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat);
// Output DDS header.
outputHeader(inputOptions.m, outputOptions, compressionOptions.m);
Format format = compressionOptions.m.format;
const uint bitCount = compressionOptions.m.bitcount;
for (int f = 0; f < inputOptions.m.faceCount; f++)
{
Image * lastImage = NULL;
AutoPtr<FloatImage> floatImage(NULL);
for (int m = 0; m < inputOptions.m.mipmapCount; m++)
{
int idx = f * inputOptions.m.mipmapCount + m;
InputOptions::Private::Image & mipmap = inputOptions.m.images[idx];
if (outputOptions.outputHandler)
{
int size = computeImageSize(mipmap.width, mipmap.height, bitCount, format);
outputOptions.outputHandler->mipmap(size, mipmap.width, mipmap.height, mipmap.depth, mipmap.face, mipmap.mipLevel);
}
Image * img; // Image to compress.
if (mipmap.data != NULL) // Mipmap provided.
{
// Convert to normal map.
if (inputOptions.m.convertToNormalMap)
{
floatImage = createNormalMap(mipmap.data.ptr(), (FloatImage::WrapMode)inputOptions.m.wrapMode, inputOptions.m.heightFactors, inputOptions.m.bumpFrequencyScale);
}
/*else if (inputOptions.m.convertToConeMap)
{
floatImage = createConeMap(mipmap.data, inputOptions.m.heightFactors);
}*/
else
{
lastImage = img = mipmap.data.ptr();
// Delete float image.
floatImage = NULL;
}
}
else // Create mipmap from last.
{
if (m == 0) {
// First mipmap missing.
if (outputOptions.errorHandler != NULL) outputOptions.errorHandler->error(Error_InvalidInput);
return false;
}
if (floatImage == NULL)
{
nvDebugCheck(lastImage != NULL);
floatImage = toFloatImage(lastImage, inputOptions.m);
}
// Create mipmap.
floatImage = createMipmap(floatImage.ptr(), inputOptions.m);
}
if (floatImage != NULL)
{
// Convert to fixed.
img = toFixedImage(floatImage.ptr(), inputOptions.m);
}
quantize(img, inputOptions.m, format);
compressMipmap(img, outputOptions, compressionOptions.m);
if (img != mipmap.data)
{
delete img;
}
if (!inputOptions.m.generateMipmaps || (inputOptions.m.maxLevel >= 0 && m >= inputOptions.m.maxLevel)) {
// continue with next face.
break;
}
}
}
return true;
}
/// Estimate the size of compressing the input with the given options.
int nvtt::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions)
{
Format format = compressionOptions.m.format;
const uint bitCount = compressionOptions.m.bitcount;
int size = 0;
for (int f = 0; f < inputOptions.m.faceCount; f++)
{
for (int m = 0; m < inputOptions.m.mipmapCount; m++)
{
int idx = f * inputOptions.m.mipmapCount + m;
const InputOptions::Private::Image & img = inputOptions.m.images[idx];
size += computeImageSize(img.width, img.height, bitCount, format);
if (!inputOptions.m.generateMipmaps || (inputOptions.m.maxLevel >= 0 && m >= inputOptions.m.maxLevel)) {
// continue with next face.
break;
}
}
}
return size;
}
/// Return a string for the given error.
const char * nvtt::errorString(Error e)
{
switch(e)
{
case Error_InvalidInput:
return "Invalid input";
case Error_UserInterruption:
return "User interruption";
case Error_UnsupportedFeature:
return "Unsupported feature";
case Error_CudaError:
return "CUDA error";
case Error_Unknown:
default:
return "Unknown error";
}
}

247
src/nvtt/nvtt.h Normal file
View File

@ -0,0 +1,247 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_H
#define NV_TT_H
#include <nvcore/nvcore.h>
//#define HAVE_S3QUANT
//#define HAVE_ATITC
// Function linkage
#if NVTT_SHARED
#ifdef NVTT_EXPORTS
#define NVTT_API DLL_EXPORT
#define NVTT_CLASS DLL_EXPORT_CLASS
#else
#define NVTT_API DLL_IMPORT
#define NVTT_CLASS DLL_IMPORT
#endif
#else
#define NVTT_API
#define NVTT_CLASS
#endif
// Public interface.
namespace nvtt
{
/// Supported compression formats.
enum Format
{
// No compression.
Format_RGB,
Format_RGBA = Format_RGB,
// DX9 formats.
Format_DXT1,
Format_DXT1a, // DXT1 with binary alpha.
Format_DXT3,
Format_DXT5,
Format_DXT5n, // Compressed HILO: R=0, G=x, B=0, A=y
// DX10 formats.
Format_BC1 = Format_DXT1,
Format_BC1a = Format_DXT1a,
Format_BC2 = Format_DXT3,
Format_BC3 = Format_DXT5,
Format_BC3n = Format_DXT5n,
Format_BC4, // ATI1
Format_BC5, // 3DC, ATI2
// OpenGL formats.
Format_LATC = Format_BC5,
};
/// Quality modes.
enum Quality
{
Quality_Fastest,
Quality_Normal,
Quality_Production,
Quality_Highest,
};
/// Compression options. This class describes the desired compression format and other compression settings.
class CompressionOptions
{
public:
NVTT_API CompressionOptions();
NVTT_API ~CompressionOptions();
NVTT_API void reset();
NVTT_API void setFormat(Format format);
NVTT_API void setQuality(Quality quality, float errorThreshold = 0.5f);
NVTT_API void setColorWeights(float red, float green, float blue);
NVTT_API void enableHardwareCompression(bool enable);
NVTT_API void setExternalCompressor(const char * name);
// Set color mask to describe the RGB/RGBA format.
NVTT_API void setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask);
//private:
struct Private;
Private & m;
};
/// Wrap modes. // This matches FloatImage::WrapMode.
enum WrapMode
{
WrapMode_Clamp,
WrapMode_Repeat,
WrapMode_Mirror,
};
/// Texture types.
enum TextureType
{
TextureType_2D,
TextureType_Cube,
// TextureType_3D,
};
/// Input formats.
enum InputFormat
{
InputFormat_BGRA_8UB,
// InputFormat_RGBE_8UB,
// InputFormat_BGRA_32F,
};
/// Mipmap downsampling filters.
enum MipmapFilter
{
MipmapFilter_Box, ///< Box filter is quite good and very fast.
MipmapFilter_Triangle, ///< Triangle filter blurs the results too much, but that might be what you want.
MipmapFilter_Kaiser, ///< Kaiser-windowed Sinc filter is the best downsampling filter.
};
/// Input options. Specify format and layout of the input texture.
struct InputOptions
{
NVTT_API InputOptions();
NVTT_API ~InputOptions();
// Set default options.
NVTT_API void reset();
// Setup input layout.
NVTT_API void setTextureLayout(TextureType type, int w, int h, int d = 1);
NVTT_API void resetTextureLayout();
// Set mipmap data. Copies the data.
NVTT_API bool setMipmapData(const void * data, int w, int h, int d = 1, int face = 0, int mipmap = 0);
// Describe the format of the input.
NVTT_API void setFormat(InputFormat fmt, bool alphaTransparency);
// Set gamma settings.
NVTT_API void setGamma(float inputGamma, float outputGamma);
// Set texture wrappign mode.
NVTT_API void setWrapMode(WrapMode mode);
// Set mipmapping options.
NVTT_API void setMipmapping(bool generateMipmaps, MipmapFilter filter = MipmapFilter_Kaiser, int maxLevel = -1);
// Set quantization options.
NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
// Set normal map options.
NVTT_API void setNormalMap(bool b);
NVTT_API void setConvertToNormalMap(bool convert);
NVTT_API void setHeightEvaluation(float redScale, float greenScale, float blueScale, float alphaScale);
NVTT_API void setNormalFilter(float small, float medium, float big, float large);
NVTT_API void setNormalizeMipmaps(bool b);
//private:
struct Private;
Private & m;
};
/// Output handler.
struct OutputHandler
{
virtual ~OutputHandler() {}
/// Indicate the start of a new compressed image that's part of the final texture.
virtual void mipmap(int size, int width, int height, int depth, int face, int miplevel) = 0;
/// Output data. Compressed data is output as soon as it's generated to minimize memory allocations.
virtual void writeData(const void * data, int size) = 0;
};
/// Error codes.
enum Error
{
Error_InvalidInput,
Error_UserInterruption,
Error_UnsupportedFeature,
Error_CudaError,
Error_Unknown,
};
/// Error handler.
struct ErrorHandler
{
virtual ~ErrorHandler() {}
// Signal error.
virtual void error(Error e) = 0;
};
/// Output Options. This class holds pointers to the interfaces that are used to report the output of
/// the compressor to the user.
struct OutputOptions
{
OutputOptions() : outputHandler(NULL), outputHeader(true) { reset(); }
OutputOptions(OutputHandler * oh, ErrorHandler * eh) : outputHandler(oh), errorHandler(eh), outputHeader(true) { reset(); }
// Set default options.
NVTT_API void reset();
OutputHandler * outputHandler;
ErrorHandler * errorHandler;
bool outputHeader;
};
// Main entrypoint of the compression library.
NVTT_API bool compress(const InputOptions & inputOptions, const OutputOptions & outputOptions, const CompressionOptions & compressionOptions);
// Estimate the size of compressing the input with the given options.
NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions);
// Return string for the given error.
NVTT_API const char * errorString(Error e);
} // nvtt namespace
#endif // NV_TT_H

View File

@ -0,0 +1,52 @@
PROJECT(squish)
ENABLE_TESTING()
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SQUISH_SRCS
alpha.cpp
alpha.h
clusterfit.cpp
clusterfit.h
fastclusterfit.cpp
fastclusterfit.h
weightedclusterfit.cpp
weightedclusterfit.h
colourblock.cpp
colourblock.h
colourfit.cpp
colourfit.h
colourset.cpp
colourset.h
config.h
maths.cpp
maths.h
rangefit.cpp
rangefit.h
singlecolourfit.cpp
singlecolourfit.h
singlecolourlookup.inl
squish.cpp
squish.h
simd.h
simd_sse.h
simd_ve.h)
ADD_LIBRARY(squish STATIC ${SQUISH_SRCS})
# libpng
FIND_PACKAGE(PNG)
IF(PNG_FOUND)
INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR})
ADD_EXECUTABLE(squishpng extra/squishpng.cpp)
TARGET_LINK_LIBRARIES(squishpng squish ${PNG_LIBRARY})
ENDIF(PNG_FOUND)
#ADD_EXECUTABLE(squishgen extra/squishgen.cpp)
ADD_EXECUTABLE(squishtest extra/squishtest.cpp)
TARGET_LINK_LIBRARIES(squishtest squish)
ADD_TEST(SQUISHTEST squishtest)

38
src/nvtt/squish/ChangeLog Normal file
View File

@ -0,0 +1,38 @@
1.7
* Fixed floating-point equality issue in clusterfit sort (x86 affected only)
* Implemented proper SSE(2) floor function for 50% speedup on SSE builds
* The range fit implementation now uses the correct colour metric
1.6
* Fixed bug in CompressImage where masked pixels were not skipped over
* DXT3 and DXT5 alpha compression now properly use the mask to ignore pixels
* Fixed major DXT1 bug that can generate unexpected transparent pixels
1.5
* Added CompressMasked function to handle incomplete DXT blocks more cleanly
* Added kWeightColourByAlpha flag for better quality images when alpha blending
1.4
* Fixed stack overflow in rangefit
1.3
* Worked around SSE floor implementation bug, proper fix needed!
* This release has visual studio and makefile builds that work
1.2
* Added provably optimal single colour compressor
* Added extra/squishgen.cpp that generates single colour lookup tables
1.1
* Fixed a DXT1 colour output bug
* Changed argument order for Decompress function to match Compress
* Added GetStorageRequirements function
* Added CompressImage function
* Added DecompressImage function
* Moved squishtool.cpp to extra/squishpng.cpp
* Added extra/squishtest.cpp
1.0
* Initial release

223
src/nvtt/squish/Doxyfile Normal file
View File

@ -0,0 +1,223 @@
# Doxyfile 1.4.6
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
PROJECT_NAME = squish
PROJECT_NUMBER = 1.1
OUTPUT_DIRECTORY = docs
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
USE_WINDOWS_ENCODING = NO
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = YES
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
DETAILS_AT_TOP = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 4
ALIASES =
OPTIMIZE_OUTPUT_FOR_C = NO
OPTIMIZE_OUTPUT_JAVA = NO
BUILTIN_STL_SUPPORT = NO
DISTRIBUTE_GROUP_DOC = NO
SUBGROUPING = YES
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = YES
EXTRACT_PRIVATE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
HIDE_FRIEND_COMPOUNDS = NO
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
CASE_SENSE_NAMES = NO
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_BY_SCOPE_NAME = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
SHOW_DIRECTORIES = NO
FILE_VERSION_FILTER =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = YES
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = NO
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = squish.h
FILE_PATTERNS =
RECURSIVE = NO
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = NO
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = NO
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
HTML_ALIGN_MEMBERS = YES
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
BINARY_TOC = NO
TOC_EXPAND = NO
DISABLE_INDEX = NO
ENUM_VALUES_PER_LINE = 4
GENERATE_TREEVIEW = NO
TREEVIEW_WIDTH = 250
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = NO
PAPER_TYPE = a4wide
EXTRA_PACKAGES =
LATEX_HEADER =
PDF_HYPERLINKS = NO
USE_PDFLATEX = NO
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_SCHEMA =
XML_DTD =
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED =
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE =
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = YES
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = YES
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
TEMPLATE_RELATIONS = NO
INCLUDE_GRAPH = YES
INCLUDED_BY_GRAPH = YES
CALL_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
DOT_PATH = /Applications/Graphviz.app/Contents/MacOS
DOTFILE_DIRS =
MAX_DOT_GRAPH_WIDTH = 1024
MAX_DOT_GRAPH_HEIGHT = 1024
MAX_DOT_GRAPH_DEPTH = 0
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES
#---------------------------------------------------------------------------
# Configuration::additions related to the search engine
#---------------------------------------------------------------------------
SEARCHENGINE = NO

31
src/nvtt/squish/Makefile Normal file
View File

@ -0,0 +1,31 @@
include config
SRC = alpha.cpp clusterfit.cpp colourblock.cpp colourfit.cpp colourset.cpp maths.cpp rangefit.cpp singlecolourfit.cpp squish.cpp
OBJ = $(SRC:%.cpp=%.o)
LIB = libsquish.a
all : $(LIB)
install : $(LIB)
install squish.h $(INSTALL_DIR)/include
install libsquish.a $(INSTALL_DIR)/lib
uninstall:
$(RM) $(INSTALL_DIR)/include/squish.h
$(RM) $(INSTALL_DIR)/lib/libsquish.a
$(LIB) : $(OBJ)
$(AR) cr $@ $?
ranlib $@
%.o : %.cpp
$(CXX) $(CPPFLAGS) -I. $(CXXFLAGS) -o$@ -c $<
clean :
$(RM) $(OBJ) $(LIB)

35
src/nvtt/squish/README Normal file
View File

@ -0,0 +1,35 @@
LICENSE
-------
The squish library is distributed under the terms and conditions of the MIT
license. This license is specified at the top of each source file and must be
preserved in its entirety.
BUILDING AND INSTALLING THE LIBRARY
-----------------------------------
If you are using Visual Studio 2003 or above under Windows then load the Visual
Studio 2003 project in the vs7 folder. By default, the library is built using
SSE optimisations. To change this either change or remove the SQUISH_USE_SSE=1
from the preprocessor symbols.
If you are using a Mac then load the Xcode 2.2 project in the distribution. By
default, the library is built using Altivec optimisations. To change this
either change or remove SQUISH_USE_ALTIVEC=1 from the preprocessor symbols. I
guess I'll have to think about changing this for the new Intel Macs that are
rolling out...
If you are using unix then first edit the config file in the base directory of
the distribution, enabling Altivec or SSE with the USE_ALTIVEC or USE_SSE
variables, and editing the optimisation flags passed to the C++ compiler if
necessary. Then make can be used to build the library, and make install (from
the superuser account) can be used to install (into /usr/local by default).
REPORTING BUGS OR FEATURE REQUESTS
----------------------------------
Feedback can be sent to Simon Brown (the developer) at si@sjbrown.co.uk
New releases are announced on the squish library homepage at
http://sjbrown.co.uk/?code=squish

326
src/nvtt/squish/alpha.cpp Normal file
View File

@ -0,0 +1,326 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "alpha.h"
#include <algorithm>
namespace squish {
static int FloatToInt( float a, int limit )
{
// use ANSI round-to-zero behaviour to get round-to-nearest
int i = ( int )( a + 0.5f );
// clamp to the limit
if( i < 0 )
i = 0;
else if( i > limit )
i = limit;
// done
return i;
}
void CompressAlphaDxt3( u8 const* rgba, void* block )
{
u8* bytes = reinterpret_cast< u8* >( block );
// quantise and pack the alpha values pairwise
for( int i = 0; i < 8; ++i )
{
// quantise down to 4 bits
float alpha1 = ( float )rgba[8*i + 3] * ( 15.0f/255.0f );
float alpha2 = ( float )rgba[8*i + 7] * ( 15.0f/255.0f );
int quant1 = FloatToInt( alpha1, 15 );
int quant2 = FloatToInt( alpha2, 15 );
// pack into the byte
bytes[i] = ( u8 )( quant1 | ( quant2 << 4 ) );
}
}
void DecompressAlphaDxt3( u8* rgba, void const* block )
{
u8 const* bytes = reinterpret_cast< u8 const* >( block );
// unpack the alpha values pairwise
for( int i = 0; i < 8; ++i )
{
// quantise down to 4 bits
u8 quant = bytes[i];
// unpack the values
u8 lo = quant & 0x0f;
u8 hi = quant & 0xf0;
// convert back up to bytes
rgba[8*i + 3] = lo | ( lo << 4 );
rgba[8*i + 7] = hi | ( hi >> 4 );
}
}
static void FixRange( int& min, int& max, int steps )
{
if( max - min < steps )
max = std::min( min + steps, 255 );
if( max - min < steps )
min = std::max( 0, max - steps );
}
static int FitCodes( u8 const* rgba, u8 const* codes, u8* indices )
{
// fit each alpha value to the codebook
int err = 0;
for( int i = 0; i < 16; ++i )
{
// find the least error and corresponding index
int value = rgba[4*i + 3];
int least = INT_MAX;
int index = 0;
for( int j = 0; j < 8; ++j )
{
// get the squared error from this code
int dist = ( int )value - ( int )codes[j];
dist *= dist;
// compare with the best so far
if( dist < least )
{
least = dist;
index = j;
}
}
// save this index and accumulate the error
indices[i] = ( u8 )index;
err += least;
}
// return the total error
return err;
}
static void WriteAlphaBlock( int alpha0, int alpha1, u8 const* indices, void* block )
{
u8* bytes = reinterpret_cast< u8* >( block );
// write the first two bytes
bytes[0] = ( u8 )alpha0;
bytes[1] = ( u8 )alpha1;
// pack the indices with 3 bits each
u8* dest = bytes + 2;
u8 const* src = indices;
for( int i = 0; i < 2; ++i )
{
// pack 8 3-bit values
int value = 0;
for( int j = 0; j < 8; ++j )
{
int index = *src++;
value |= ( index << 3*j );
}
// store in 3 bytes
for( int j = 0; j < 3; ++j )
{
int byte = ( value >> 8*j ) & 0xff;
*dest++ = ( u8 )byte;
}
}
}
static void WriteAlphaBlock5( int alpha0, int alpha1, u8 const* indices, void* block )
{
// check the relative values of the endpoints
if( alpha0 > alpha1 )
{
// swap the indices
u8 swapped[16];
for( int i = 0; i < 16; ++i )
{
u8 index = indices[i];
if( index == 0 )
swapped[i] = 1;
else if( index == 1 )
swapped[i] = 0;
else if( index <= 5 )
swapped[i] = 7 - index;
else
swapped[i] = index;
}
// write the block
WriteAlphaBlock( alpha1, alpha0, swapped, block );
}
else
{
// write the block
WriteAlphaBlock( alpha0, alpha1, indices, block );
}
}
static void WriteAlphaBlock7( int alpha0, int alpha1, u8 const* indices, void* block )
{
// check the relative values of the endpoints
if( alpha0 < alpha1 )
{
// swap the indices
u8 swapped[16];
for( int i = 0; i < 16; ++i )
{
u8 index = indices[i];
if( index == 0 )
swapped[i] = 1;
else if( index == 1 )
swapped[i] = 0;
else
swapped[i] = 9 - index;
}
// write the block
WriteAlphaBlock( alpha1, alpha0, swapped, block );
}
else
{
// write the block
WriteAlphaBlock( alpha0, alpha1, indices, block );
}
}
void CompressAlphaDxt5( u8 const* rgba, void* block )
{
// get the range for 5-alpha and 7-alpha interpolation
int min5 = 255;
int max5 = 0;
int min7 = 255;
int max7 = 0;
for( int i = 0; i < 16; ++i )
{
// incorporate into the min/max
int value = rgba[4*i + 3];
if( value < min7 )
min7 = value;
if( value > max7 )
max7 = value;
if( value != 0 && value < min5 )
min5 = value;
if( value != 255 && value > max5 )
max5 = value;
}
// handle the case that no valid range was found
if( min5 > max5 )
min5 = max5;
if( min7 > max7 )
min7 = max7;
// fix the range to be the minimum in each case
FixRange( min5, max5, 5 );
FixRange( min7, max7, 7 );
// set up the 5-alpha code book
u8 codes5[8];
codes5[0] = ( u8 )min5;
codes5[1] = ( u8 )max5;
for( int i = 1; i < 5; ++i )
codes5[1 + i] = ( u8 )( ( ( 5 - i )*min5 + i*max5 )/5 );
codes5[6] = 0;
codes5[7] = 255;
// set up the 7-alpha code book
u8 codes7[8];
codes7[0] = ( u8 )min7;
codes7[1] = ( u8 )max7;
for( int i = 1; i < 7; ++i )
codes7[1 + i] = ( u8 )( ( ( 7 - i )*min7 + i*max7 )/7 );
// fit the data to both code books
u8 indices5[16];
u8 indices7[16];
int err5 = FitCodes( rgba, codes5, indices5 );
int err7 = FitCodes( rgba, codes7, indices7 );
// save the block with least error
if( err5 <= err7 )
WriteAlphaBlock5( min5, max5, indices5, block );
else
WriteAlphaBlock7( min7, max7, indices7, block );
}
void DecompressAlphaDxt5( u8* rgba, void const* block )
{
// get the two alpha values
u8 const* bytes = reinterpret_cast< u8 const* >( block );
int alpha0 = bytes[0];
int alpha1 = bytes[1];
// compare the values to build the codebook
u8 codes[8];
codes[0] = ( u8 )alpha0;
codes[1] = ( u8 )alpha1;
if( alpha0 <= alpha1 )
{
// use 5-alpha codebook
for( int i = 1; i < 5; ++i )
codes[1 + i] = ( u8 )( ( ( 5 - i )*alpha0 + i*alpha1 )/5 );
codes[6] = 0;
codes[7] = 255;
}
else
{
// use 7-alpha codebook
for( int i = 1; i < 7; ++i )
codes[1 + i] = ( u8 )( ( ( 7 - i )*alpha0 + i*alpha1 )/7 );
}
// decode the indices
u8 indices[16];
u8 const* src = bytes + 2;
u8* dest = indices;
for( int i = 0; i < 2; ++i )
{
// grab 3 bytes
int value = 0;
for( int j = 0; j < 3; ++j )
{
int byte = *src++;
value |= ( byte << 8*j );
}
// unpack 8 3-bit values from it
for( int j = 0; j < 8; ++j )
{
int index = ( value >> 3*j ) & 0x7;
*dest++ = ( u8 )index;
}
}
// write out the indexed codebook values
for( int i = 0; i < 16; ++i )
rgba[4*i + 3] = codes[indices[i]];
}
} // namespace squish

41
src/nvtt/squish/alpha.h Normal file
View File

@ -0,0 +1,41 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_ALPHA_H
#define SQUISH_ALPHA_H
#include <squish.h>
namespace squish {
void CompressAlphaDxt3( u8 const* rgba, void* block );
void CompressAlphaDxt5( u8 const* rgba, void* block );
void DecompressAlphaDxt3( u8* rgba, void const* block );
void DecompressAlphaDxt5( u8* rgba, void const* block );
} // namespace squish
#endif // ndef SQUISH_ALPHA_H

View File

@ -0,0 +1,499 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "clusterfit.h"
#include "colourset.h"
#include "colourblock.h"
#include <cfloat>
namespace squish {
ClusterFit::ClusterFit( ColourSet const* colours, int flags )
: ColourFit( colours, flags )
{
// initialise the best error
#if SQUISH_USE_SIMD
m_besterror = VEC4_CONST( FLT_MAX );
#else
m_besterror = FLT_MAX;
#endif
/* // initialise the metric
bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
#if SQUISH_USE_SIMD
if( perceptual )
m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f );
else
m_metric = VEC4_CONST( 1.0f );
#else
if( perceptual )
m_metric = Vec3( 0.2126f, 0.7152f, 0.0722f );
else
m_metric = Vec3( 1.0f );
#endif
*/
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// get the covariance matrix
Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
// compute the principle component
Vec3 principle = ComputePrincipleComponent( covariance );
// build the list of values
float dps[16];
for( int i = 0; i < count; ++i )
{
dps[i] = Dot( values[i], principle );
m_order[i] = i;
}
// stable sort
for( int i = 0; i < count; ++i )
{
for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
{
std::swap( dps[j], dps[j - 1] );
std::swap( m_order[j], m_order[j - 1] );
}
}
// weight all the points
#if SQUISH_USE_SIMD
Vec4 const* unweighted = m_colours->GetPointsSimd();
Vec4 const* weights = m_colours->GetWeightsSimd();
m_xxsum = VEC4_CONST( 0.0f );
#else
Vec3 const* unweighted = m_colours->GetPoints();
float const* weights = m_colours->GetWeights();
m_xxsum = Vec3( 0.0f );
#endif
for( int i = 0; i < count; ++i )
{
int p = m_order[i];
m_unweighted[i] = unweighted[p];
m_weights[i] = weights[p];
m_weighted[i] = weights[p]*unweighted[p];
m_xxsum += m_weighted[i]*m_weighted[i];
}
}
void ClusterFit::setMetric(float r, float g, float b)
{
#if SQUISH_USE_SIMD
m_metric = Vec4(r, g, b, 0);
#else
m_metric = Vec3(r, g, b);
#endif
}
float ClusterFit::bestError() const
{
#if SQUISH_USE_SIMD
return m_besterror.GetVec3().X();
#else
return m_besterror;
#endif
}
void ClusterFit::Compress3( void* block )
{
// declare variables
int const count = m_colours->GetCount();
#if SQUISH_USE_SIMD
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = VEC4_CONST( FLT_MAX );
Vec4 const half = VEC4_CONST( 0.5f );
Vec4 const zero = VEC4_CONST( 0.0f );
#else
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
float besterror = FLT_MAX;
float const half = 0.5f;
float const zero = 0.0f;
#endif
// check all possible clusters for this total order
u8 indices[16];
u8 bestindices[16];
// first cluster [0,i) is at the start
for( int m = 0; m < count; ++m )
{
indices[m] = 0;
m_alpha[m] = m_weights[m];
m_beta[m] = zero;
}
for( int i = count; i >= 0; --i )
{
// second cluster [i,j) is half along
for( int m = i; m < count; ++m )
{
indices[m] = 2;
m_alpha[m] = m_beta[m] = half*m_weights[m];
}
for( int j = count; j > i; --j )
{
// last cluster [j,k) is at the end
if( j < count )
{
indices[j] = 1;
m_alpha[j] = zero;
m_beta[j] = m_weights[j];
}
// solve a least squares problem to place the endpoints
#if SQUISH_USE_SIMD
Vec4 start, end;
Vec4 error = SolveLeastSquares( start, end );
#else
Vec3 start, end;
float error = SolveLeastSquares( start, end );
#endif
// keep the solution if it wins
#if SQUISH_USE_SIMD
if( CompareAnyLessThan( error, besterror ) )
#else
if( error < besterror )
#endif
{
beststart = start;
bestend = end;
for( int m = 0; m < 16; ++m ) // TODO: make this faster?
bestindices[m] = indices[m];
besterror = error;
}
}
}
// save the block if necessary
#if SQUISH_USE_SIMD
if( CompareAnyLessThan( besterror, m_besterror ) )
#else
if( besterror < m_besterror )
#endif
{
// remap the indices
u8 unordered[16];
for( int i = 0; i < count; ++i )
unordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( unordered, bestindices );
// save the block
#if SQUISH_USE_SIMD
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
#else
WriteColourBlock3( beststart, bestend, bestindices, block );
#endif
// save the error
m_besterror = besterror;
}
}
//static int run = 0;
//static bool debug = false;
void ClusterFit::Compress4( void* block )
{
//debug = (run == 1);
//run++;
// declare variables
int const count = m_colours->GetCount();
#if SQUISH_USE_SIMD
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = m_besterror;
Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f );
Vec4 const onethird = VEC4_CONST( 1.0f/3.0f );
Vec4 const zero = VEC4_CONST( 0.0f );
#else
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
float besterror = m_besterror;
float const twothirds = 2.0f/3.0f;
float const onethird = 1.0f/3.0f;
float const zero = 0.0f;
#endif
// check all possible clusters for this total order
u8 indices[16];
u8 bestindices[16];
// first cluster [0,i) is at the start
for( int m = 0; m < count; ++m )
{
indices[m] = 0;
m_alpha[m] = m_weights[m];
m_beta[m] = zero;
}
for( int i = count; i >= 0; --i )
{
// second cluster [i,j) is one third along
for( int m = i; m < count; ++m )
{
indices[m] = 2;
m_alpha[m] = twothirds*m_weights[m];
m_beta[m] = onethird*m_weights[m];
}
for( int j = count; j >= i; --j )
{
// third cluster [j,k) is two thirds along
for( int m = j; m < count; ++m )
{
indices[m] = 3;
m_alpha[m] = onethird*m_weights[m];
m_beta[m] = twothirds*m_weights[m];
}
for( int k = count; k >= j; --k )
{
if (j + k == 0) continue;
// last cluster [k,n) is at the end
if( k < count )
{
indices[k] = 1;
m_alpha[k] = zero;
m_beta[k] = m_weights[k];
}
/*unsigned int permutation = 0;
for(int p = 0; p < 16; p++) {
permutation |= indices[p] << (p * 2);
}
if (debug) printf("%X:\t", permutation);
if (debug && permutation == 0x55FFFFAA) __debugbreak();
*/
// solve a least squares problem to place the endpoints
#if SQUISH_USE_SIMD
Vec4 start, end;
Vec4 error = SolveLeastSquares( start, end );
#else
Vec3 start, end;
float error = SolveLeastSquares( start, end );
#endif
// keep the solution if it wins
#if SQUISH_USE_SIMD
if( CompareAnyLessThan( error, besterror ) )
#else
if( error < besterror )
#endif
{
beststart = start;
bestend = end;
for( int m = 0; m < 16; ++m ) // TODO: make this faster?
bestindices[m] = indices[m];
besterror = error;
}
}
}
}
// save the block if necessary
#if SQUISH_USE_SIMD
if( CompareAnyLessThan( besterror, m_besterror ) )
#else
if( besterror < m_besterror )
#endif
{
// remap the indices
u8 unordered[16];
for( int i = 0; i < count; ++i )
unordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( unordered, bestindices );
// save the block
#if SQUISH_USE_SIMD
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
#else
WriteColourBlock4( beststart, bestend, bestindices, block );
#endif
// save the error
m_besterror = besterror;
}
}
#if SQUISH_USE_SIMD
Vec4 ClusterFit::SolveLeastSquares( Vec4& start, Vec4& end ) const
{
// accumulate all the quantities we need
int const count = m_colours->GetCount();
Vec4 alpha2_sum = VEC4_CONST( 0.0f );
Vec4 beta2_sum = VEC4_CONST( 0.0f );
Vec4 alphabeta_sum = VEC4_CONST( 0.0f );
Vec4 alphax_sum = VEC4_CONST( 0.0f );
Vec4 betax_sum = VEC4_CONST( 0.0f );
for( int i = 0; i < count; ++i )
{
Vec4 alpha = m_alpha[i];
Vec4 beta = m_beta[i];
Vec4 x = m_weighted[i];
alpha2_sum = MultiplyAdd( alpha, alpha, alpha2_sum );
beta2_sum = MultiplyAdd( beta, beta, beta2_sum );
alphabeta_sum = MultiplyAdd( alpha, beta, alphabeta_sum );
alphax_sum = MultiplyAdd( alpha, x, alphax_sum );
betax_sum = MultiplyAdd( beta, x, betax_sum );
}
// select the results
Vec4 const zero = VEC4_CONST( 0.0f );
Vec4 beta2_sum_zero = CompareEqual( beta2_sum, zero );
Vec4 alpha2_sum_zero = CompareEqual( alpha2_sum, zero );
Vec4 a1 = alphax_sum*Reciprocal( alpha2_sum );
Vec4 b1 = betax_sum*Reciprocal( beta2_sum );
Vec4 factor = Reciprocal( NegativeMultiplySubtract(
alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum
) );
Vec4 a2 = NegativeMultiplySubtract(
betax_sum, alphabeta_sum, alphax_sum*beta2_sum
)*factor;
Vec4 b2 = NegativeMultiplySubtract(
alphax_sum, alphabeta_sum, betax_sum*alpha2_sum
)*factor;
Vec4 a = Select( Select( a2, a1, beta2_sum_zero ), zero, alpha2_sum_zero );
Vec4 b = Select( Select( b2, b1, alpha2_sum_zero ), zero, beta2_sum_zero );
// clamp the output to [0, 1]
Vec4 const one = VEC4_CONST( 1.0f );
Vec4 const half = VEC4_CONST( 0.5f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
// Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); // IC: use approximate grid fitting.
Vec4 const onethird = VEC4_CONST( 1.0f/3.0f );
Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f );
a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
// compute the error
Vec4 const two = VEC4_CONST( 2.0 );
Vec4 e1 = MultiplyAdd( b*b, beta2_sum, m_xxsum );
Vec4 e2 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e3 = MultiplyAdd( a*a, alpha2_sum, e1 );
Vec4 e4 = MultiplyAdd( a*b*alphabeta_sum - e2, two, e3 );
// apply the metric to the error term
Vec4 e5 = e4*m_metric;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
// save the start and end
start = a;
end = b;
return error;
}
#else
float ClusterFit::SolveLeastSquares( Vec3& start, Vec3& end ) const
{
// accumulate all the quantities we need
int const count = m_colours->GetCount();
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
float alphabeta_sum = 0.0f;
Vec3 alphax_sum( 0.0f );
Vec3 betax_sum( 0.0f );
for( int i = 0; i < count; ++i )
{
float alpha = m_alpha[i];
float beta = m_beta[i];
Vec3 const& x = m_weighted[i];
alpha2_sum += alpha*alpha;
beta2_sum += beta*beta;
alphabeta_sum += alpha*beta;
alphax_sum += alpha*x;
betax_sum += beta*x;
}
//if (debug) printf("%f %f %f", alpha2_sum, beta2_sum, alphabeta_sum);
// zero where non-determinate
Vec3 a, b;
if( beta2_sum == 0.0f )
{
a = alphax_sum/alpha2_sum;
b = Vec3( 0.0f );
}
else if( alpha2_sum == 0.0f )
{
a = Vec3( 0.0f );
b = betax_sum/beta2_sum;
}
else
{
float factor = 1.0f/( alpha2_sum*beta2_sum - alphabeta_sum*alphabeta_sum );
a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
}
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
//Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
Vec3 const gridrcp(0.03227752766457f, 0.01583151765563f, 0.03227752766457f); // IC: use approximate grid fitting.
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
// compute the error
Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum /*+ m_xxsum*/
+ 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
// apply the metric to the error term
float error = Dot( e1, m_metric );
//if (debug) printf(" - %f\n", error);
// save the start and end
start = a;
end = b;
return error;
}
#endif
} // namespace squish

View File

@ -0,0 +1,79 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_CLUSTERFIT_H
#define SQUISH_CLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace squish {
class ClusterFit : public ColourFit
{
public:
ClusterFit( ColourSet const* colours, int flags );
void setMetric(float r, float g, float b);
float bestError() const;
private:
virtual void Compress3( void* block );
virtual void Compress4( void* block );
void Reorder( Vec3::Arg principle );
Vec3 m_principle;
#if SQUISH_USE_SIMD
Vec4 SolveLeastSquares( Vec4& start, Vec4& end ) const;
Vec4 m_weighted[16];
Vec4 m_unweighted[16];
Vec4 m_weights[16];
Vec4 m_metric;
Vec4 m_alpha[16];
Vec4 m_beta[16];
Vec4 m_xxsum;
Vec4 m_besterror;
#else
float SolveLeastSquares( Vec3& start, Vec3& end ) const;
Vec3 m_weighted[16];
Vec3 m_unweighted[16];
float m_weights[16];
Vec3 m_metric;
float m_alpha[16];
float m_beta[16];
Vec3 m_xxsum;
float m_besterror;
#endif
int m_order[16];
};
} // namespace squish
#endif // ndef SQUISH_CLUSTERFIT_H

View File

@ -0,0 +1,278 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourblock.h"
namespace squish {
static int FloatToInt( float a, int limit )
{
// use ANSI round-to-zero behaviour to get round-to-nearest
int i = ( int )( a + 0.5f );
// clamp to the limit
if( i < 0 )
i = 0;
else if( i > limit )
i = limit;
// done
return i;
}
static int FloatTo565( Vec3::Arg colour )
{
// get the components in the correct range
int r = FloatToInt( 31.0f*colour.X(), 31 );
int g = FloatToInt( 63.0f*colour.Y(), 63 );
int b = FloatToInt( 31.0f*colour.Z(), 31 );
// pack into a single value
return ( r << 11 ) | ( g << 5 ) | b;
}
static void WriteColourBlock( int a, int b, u8* indices, void* block )
{
// get the block as bytes
u8* bytes = ( u8* )block;
// write the endpoints
bytes[0] = ( u8 )( a & 0xff );
bytes[1] = ( u8 )( a >> 8 );
bytes[2] = ( u8 )( b & 0xff );
bytes[3] = ( u8 )( b >> 8 );
// write the indices
for( int i = 0; i < 4; ++i )
{
u8 const* ind = indices + 4*i;
bytes[4 + i] = ind[0] | ( ind[1] << 2 ) | ( ind[2] << 4 ) | ( ind[3] << 6 );
}
}
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
u8 remapped[16];
if( a <= b )
{
// use the indices directly
for( int i = 0; i < 16; ++i )
remapped[i] = indices[i];
}
else
{
// swap a and b
std::swap( a, b );
for( int i = 0; i < 16; ++i )
{
if( indices[i] == 0 )
remapped[i] = 1;
else if( indices[i] == 1 )
remapped[i] = 0;
else
remapped[i] = indices[i];
}
}
// write the block
WriteColourBlock( a, b, remapped, block );
}
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
u8 remapped[16];
if( a < b )
{
// swap a and b
std::swap( a, b );
for( int i = 0; i < 16; ++i )
remapped[i] = ( indices[i] ^ 0x1 ) & 0x3;
}
else if( a == b )
{
// use index 0
for( int i = 0; i < 16; ++i )
remapped[i] = 0;
}
else
{
// use the indices directly
for( int i = 0; i < 16; ++i )
remapped[i] = indices[i];
}
// write the block
WriteColourBlock( a, b, remapped, block );
}
/*
static void WriteColourBlock( int a, int b, uint indices, void* block )
{
// get the block as bytes
u8* bytes = ( u8* )block;
// write the endpoints
bytes[0] = ( u8 )( a & 0xff );
bytes[1] = ( u8 )( a >> 8 );
bytes[2] = ( u8 )( b & 0xff );
bytes[3] = ( u8 )( b >> 8 );
// write the indices @@ Not sure that's correct...
bytes[4] = ( u8 )((indices >> 24) & 0xff);
bytes[5] = ( u8 )((indices >> 16) & 0xff);
bytes[6] = ( u8 )((indices >> 8) & 0xff);
bytes[7] = ( u8 )((indices >> 0) & 0xff);
}
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, uint indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
if( a > b )
{
// swap a and b
std::swap( a, b );
indices ^= (~indices >> 1) & 0x55555555;
}
else if ( a == b )
{
indices = 0;
}
// write the block
WriteColourBlock( a, b, indices, block );
}
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, uint indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
if( a < b )
{
// swap a and b
std::swap( a, b );
indices ^= 0x55555555;
}
else if( a == b )
{
indices = 0;
}
// write the block
WriteColourBlock( a, b, indices, block );
}
*/
static int Unpack565( u8 const* packed, u8* colour )
{
// build the packed value
int value = ( int )packed[0] | ( ( int )packed[1] << 8 );
// get the components in the stored range
u8 red = ( u8 )( ( value >> 11 ) & 0x1f );
u8 green = ( u8 )( ( value >> 5 ) & 0x3f );
u8 blue = ( u8 )( value & 0x1f );
// scale up to 8 bits
colour[0] = ( red << 3 ) | ( red >> 2 );
colour[1] = ( green << 2 ) | ( green >> 4 );
colour[2] = ( blue << 3 ) | ( blue >> 2 );
colour[3] = 255;
// return the value
return value;
}
void DecompressColour( u8* rgba, void const* block, bool isDxt1 )
{
// get the block bytes
u8 const* bytes = reinterpret_cast< u8 const* >( block );
// unpack the endpoints
u8 codes[16];
int a = Unpack565( bytes, codes );
int b = Unpack565( bytes + 2, codes + 4 );
// generate the midpoints
for( int i = 0; i < 3; ++i )
{
int c = codes[i];
int d = codes[4 + i];
if( isDxt1 && a <= b )
{
codes[8 + i] = ( u8 )( ( c + d )/2 );
codes[12 + i] = 0;
}
else
{
codes[8 + i] = ( u8 )( ( 2*c + d )/3 );
codes[12 + i] = ( u8 )( ( c + 2*d )/3 );
}
}
// fill in alpha for the intermediate values
codes[8 + 3] = 255;
codes[12 + 3] = ( isDxt1 && a <= b ) ? 0 : 255;
// unpack the indices
u8 indices[16];
for( int i = 0; i < 4; ++i )
{
u8* ind = indices + 4*i;
u8 packed = bytes[4 + i];
ind[0] = packed & 0x3;
ind[1] = ( packed >> 2 ) & 0x3;
ind[2] = ( packed >> 4 ) & 0x3;
ind[3] = ( packed >> 6 ) & 0x3;
}
// store out the colours
for( int i = 0; i < 16; ++i )
{
u8 offset = 4*indices[i];
for( int j = 0; j < 4; ++j )
rgba[4*i + j] = codes[offset + j];
}
}
} // namespace squish

View File

@ -0,0 +1,43 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURBLOCK_H
#define SQUISH_COLOURBLOCK_H
#include "squish.h"
#include "maths.h"
namespace squish {
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
//void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, uint indices, void* block );
//void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, uint indices, void* block );
void DecompressColour( u8* rgba, void const* block, bool isDxt1 );
} // namespace squish
#endif // ndef SQUISH_COLOURBLOCK_H

View File

@ -0,0 +1,54 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourfit.h"
#include "colourset.h"
namespace squish {
ColourFit::ColourFit( ColourSet const* colours, int flags )
: m_colours( colours ),
m_flags( flags )
{
}
void ColourFit::Compress( void* block )
{
bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
if( isDxt1 )
{
Compress4( block );
if( !m_colours->IsTransparent() )
{
Compress3( block );
}
}
else
{
Compress4( block );
}
}
} // namespace squish

View File

@ -0,0 +1,53 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURFIT_H
#define SQUISH_COLOURFIT_H
#include "squish.h"
#include "maths.h"
namespace squish {
class ColourSet;
class ColourFit
{
public:
ColourFit( ColourSet const* colours, int flags );
void Compress( void* block );
protected:
virtual void Compress3( void* block ) = 0;
virtual void Compress4( void* block ) = 0;
ColourSet const* m_colours;
int m_flags;
};
} // namespace squish
#endif // ndef SQUISH_COLOURFIT_H

View File

@ -0,0 +1,134 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourset.h"
namespace squish {
ColourSet::ColourSet( u8 const* rgba, int flags )
: m_count( 0 ),
m_transparent( false )
{
// check the compression mode for dxt1
bool isDxt1 = ( ( flags & kDxt1 ) != 0 );
bool weightByAlpha = ( ( flags & kWeightColourByAlpha ) != 0 );
// create the minimal set
for( int i = 0; i < 16; ++i )
{
// check for transparent pixels when using dxt1
if( isDxt1 && rgba[4*i + 3] == 0 )
{
m_remap[i] = -1;
m_transparent = true;
continue;
}
#if 1
// normalise coordinates to [0,1]
float x = ( float )rgba[4*i + 2] / 255.0f;
float y = ( float )rgba[4*i + 1] / 255.0f;
float z = ( float )rgba[4*i + 0] / 255.0f;
// ensure there is always non-zero weight even for zero alpha
float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
// add the point
m_points[m_count] = Vec3( x, y, z );
m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
m_remap[i] = m_count;
// advance
++m_count;
#else
// loop over previous points for a match
for( int j = 0;; ++j )
{
// allocate a new point
if( j == i )
{
// normalise coordinates to [0,1]
float x = ( float )rgba[4*i + 2] / 255.0f;
float y = ( float )rgba[4*i + 1] / 255.0f;
float z = ( float )rgba[4*i + 0] / 255.0f;
// ensure there is always non-zero weight even for zero alpha
float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
// add the point
m_points[m_count] = Vec3( x, y, z );
m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
m_remap[i] = m_count;
// advance
++m_count;
break;
}
// check for a match
bool match = ( rgba[4*i] == rgba[4*j] )
&& ( rgba[4*i + 1] == rgba[4*j + 1] )
&& ( rgba[4*i + 2] == rgba[4*j + 2] )
&& ( rgba[4*j + 3] != 0 || !isDxt1 );
if( match )
{
// get the index of the match
int index = m_remap[j];
// ensure there is always non-zero weight even for zero alpha
float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
// map to this point and increase the weight
m_weights[index] += ( weightByAlpha ? w : 1.0f );
m_remap[i] = index;
break;
}
}
#endif
}
#if SQUISH_USE_SIMD
// generate vector values
for( int i = 0; i < m_count; ++i )
{
m_points_simd[i] = Vec4(m_points[i].X(), m_points[i].Y(), m_points[i].Z(), 1);
m_weights_simd[i] = VEC4_CONST(m_weights[i]);
}
#endif
}
void ColourSet::RemapIndices( u8 const* source, u8* target ) const
{
for( int i = 0; i < 16; ++i )
{
int j = m_remap[i];
if( j == -1 )
target[i] = 3;
else
target[i] = source[j];
}
}
} // namespace squish

View File

@ -0,0 +1,69 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURSET_H
#define SQUISH_COLOURSET_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
namespace squish {
/*! @brief Represents a set of block colours
*/
class ColourSet
{
public:
ColourSet( u8 const* rgba, int flags );
int GetCount() const { return m_count; }
Vec3 const* GetPoints() const { return m_points; }
float const* GetWeights() const { return m_weights; }
bool IsTransparent() const { return m_transparent; }
void RemapIndices( u8 const* source, u8* target ) const;
private:
int m_count;
Vec3 m_points[16];
float m_weights[16];
int m_remap[16];
bool m_transparent;
#if SQUISH_USE_SIMD
public:
Vec4 const* GetPointsSimd() const { return m_points_simd; }
Vec4 const* GetWeightsSimd() const { return m_weights_simd; }
private:
Vec4 m_points_simd[16];
Vec4 m_weights_simd[16];
#endif
};
} // namespace sqish
#endif // ndef SQUISH_COLOURSET_H

22
src/nvtt/squish/config Normal file
View File

@ -0,0 +1,22 @@
# config file used for the Makefile only
# define to 1 to use altivec instructions
USE_ALTIVEC ?= 0
# define to 1 to use sse instructions
USE_SSE ?= 0
# default flags
CXXFLAGS ?= -O2
ifeq ($(USE_ALTIVEC),1)
CPPFLAGS += -DSQUISH_USE_ALTIVEC=1
CXXFLAGS += -maltivec
endif
ifeq ($(USE_SSE),1)
CPPFLAGS += -DSQUISH_USE_SSE=1
CXXFLAGS += -msse
endif
# where should we install to
INSTALL_DIR ?= /usr/local

55
src/nvtt/squish/config.h Normal file
View File

@ -0,0 +1,55 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_CONFIG_H
#define SQUISH_CONFIG_H
// Set to 1 when building squish to use altivec instructions.
#ifndef SQUISH_USE_ALTIVEC
# define SQUISH_USE_ALTIVEC defined(__VEC__)
#endif
// Set to 1 when building squish to use sse instructions.
#ifndef SQUISH_USE_SSE
# if defined(__SSE2__)
# define SQUISH_USE_SSE 2
# elif defined(__SSE__)
# define SQUISH_USE_SSE 1
# else
# define SQUISH_USE_SSE 0
# endif
#endif
// Internally et SQUISH_USE_SIMD when either altivec or sse is available.
#if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
# error "Cannot enable both altivec and sse!"
#endif
#if SQUISH_USE_ALTIVEC || SQUISH_USE_SSE
# define SQUISH_USE_SIMD 1
#else
# define SQUISH_USE_SIMD 0
#endif
#endif // ndef SQUISH_CONFIG_H

View File

@ -0,0 +1,158 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include <iostream>
struct SourceBlock
{
int start;
int end;
int error;
};
struct TargetValue
{
SourceBlock sources[4];
};
static void GenerateData( std::string const& name, int bits, int colours )
{
TargetValue values[256];
// initialise the data
for( int target = 0; target < 256; ++target )
for( int index = 0; index < colours; ++index )
values[target].sources[index].error = 255;
// loop over all possible source points
int count = ( 1 << bits );
for( int value1 = 0; value1 < count; ++value1 )
{
for( int value2 = 0; value2 < count; ++value2 )
{
// compute the 8-bit endpoints
int a = ( value1 << ( 8 - bits ) ) | ( value1 >> ( 2*bits - 8 ) );
int b = ( value2 << ( 8 - bits ) ) | ( value2 >> ( 2*bits - 8 ) );
// fill in the codebook with the these and intermediates
int codes[4];
codes[0] = a;
codes[1] = b;
if( colours == 3 )
{
codes[2] = ( a + b )/2;
codes[3] = 0;
}
else
{
codes[2] = ( 2*a + b )/3;
codes[3] = ( a + 2*b )/3;
}
// mark each target point with the endpoints and index needed for it
for( int index = 0; index < colours; ++index )
{
int target = codes[index];
SourceBlock& block = values[target].sources[index];
if( block.error != 0 )
{
block.start = value1;
block.end = value2;
block.error = 0;
}
}
}
}
// iteratively fill in the missing values
for( ;; )
{
bool stable = true;
for( int index = 0; index < colours; ++index )
{
for( int target = 0; target < 256; ++target )
{
if( target != 255 )
{
SourceBlock& current = values[target].sources[index];
SourceBlock& next = values[target + 1].sources[index];
if( current.error > next.error + 1 )
{
current.start = next.start;
current.end = next.end;
current.error = next.error + 1;
stable = false;
}
}
if( target != 0 )
{
SourceBlock& current = values[target].sources[index];
SourceBlock& previous = values[target - 1].sources[index];
if( current.error > previous.error + 1 )
{
current.start = previous.start;
current.end = previous.end;
current.error = previous.error + 1;
stable = false;
}
}
}
}
if( stable )
break;
}
// debug
std::cout << "\nstatic SingleColourLookup const " << name << "[] = \n{\n";
for( int i = 0;; )
{
std::cout << "\t{ { ";
for( int j = 0;; )
{
SourceBlock const& block = values[i].sources[j];
if( j < colours )
std::cout << "{ " << block.start << ", " << block.end << ", " << block.error << " }";
else
std::cout << "{ 0, 0, 0 }";
if( ++j == 4 )
break;
std::cout << ", ";
}
std::cout << " } }";
if( ++i == 256 )
break;
std::cout << ",\n";
}
std::cout << "\n};\n";
}
int main()
{
GenerateData( "lookup_5_3", 5, 3 );
GenerateData( "lookup_6_3", 6, 3 );
GenerateData( "lookup_5_4", 5, 4 );
GenerateData( "lookup_6_4", 6, 4 );
}

View File

@ -0,0 +1,603 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
/*! @file
@brief Example program that converts between the PNG and DXT formats.
This program requires libpng for PNG input and output, and is designed
to show how to prepare data for the squish library when it is not simply
a contiguous block of memory.
*/
#include <iostream>
#include <string>
#include <sstream>
#include <ctime>
#include <cmath>
#include <squish.h>
#include <png.h>
#ifdef _MSC_VER
#pragma warning( disable: 4511 4512 )
#endif // def _MSC_VER
using namespace squish;
//! Simple exception class.
class Error : public std::exception
{
public:
Error( std::string const& excuse ) : m_excuse( excuse ) {}
~Error() throw() {}
virtual char const* what() const throw() { return m_excuse.c_str(); }
private:
std::string m_excuse;
};
//! Base class to make derived classes non-copyable
class NonCopyable
{
public:
NonCopyable() {}
private:
NonCopyable( NonCopyable const& );
NonCopyable& operator=( NonCopyable const& );
};
//! Memory object.
class Mem : NonCopyable
{
public:
explicit Mem( int size ) : m_p( new u8[size] ) {}
~Mem() { delete[] m_p; }
u8* Get() const { return m_p; }
private:
u8* m_p;
};
//! File object.
class File : NonCopyable
{
public:
explicit File( FILE* fp ) : m_fp( fp ) {}
~File() { if( m_fp ) fclose( m_fp ); }
bool IsValid() const { return m_fp != 0; }
FILE* Get() const { return m_fp; }
private:
FILE* m_fp;
};
//! PNG read object.
class PngReadStruct : NonCopyable
{
public:
PngReadStruct()
: m_png( 0 ),
m_info( 0 ),
m_end( 0 )
{
m_png = png_create_read_struct( PNG_LIBPNG_VER_STRING, 0, 0, 0 );
if( !m_png )
throw Error( "failed to create png read struct" );
m_info = png_create_info_struct( m_png );
m_end = png_create_info_struct( m_png );
if( !m_info || !m_end )
{
png_infopp info = m_info ? &m_info : 0;
png_infopp end = m_end ? &m_end : 0;
png_destroy_read_struct( &m_png, info, end );
throw Error( "failed to create png info structs" );
}
}
~PngReadStruct()
{
png_destroy_read_struct( &m_png, &m_info, &m_end );
}
png_structp GetPng() const { return m_png; }
png_infop GetInfo() const { return m_info; }
private:
png_structp m_png;
png_infop m_info, m_end;
};
//! PNG write object.
class PngWriteStruct : NonCopyable
{
public:
PngWriteStruct()
: m_png( 0 ),
m_info( 0 )
{
m_png = png_create_write_struct( PNG_LIBPNG_VER_STRING, 0, 0, 0 );
if( !m_png )
throw Error( "failed to create png read struct" );
m_info = png_create_info_struct( m_png );
if( !m_info )
{
png_infopp info = m_info ? &m_info : 0;
png_destroy_write_struct( &m_png, info );
throw Error( "failed to create png info structs" );
}
}
~PngWriteStruct()
{
png_destroy_write_struct( &m_png, &m_info );
}
png_structp GetPng() const { return m_png; }
png_infop GetInfo() const { return m_info; }
private:
png_structp m_png;
png_infop m_info;
};
//! PNG rows object.
class PngRows : NonCopyable
{
public:
PngRows( int width, int height, int stride ) : m_width( width ), m_height( height )
{
m_rows = ( png_bytep* )malloc( m_height*sizeof( png_bytep ) );
for( int i = 0; i < m_height; ++i )
m_rows[i] = ( png_bytep )malloc( m_width*stride );
}
~PngRows()
{
for( int i = 0; i < m_height; ++i )
free( m_rows[i] );
free( m_rows );
}
png_bytep* Get() const { return m_rows; }
private:
png_bytep* m_rows;
int m_width, m_height;
};
class PngImage
{
public:
explicit PngImage( std::string const& fileName );
int GetWidth() const { return m_width; }
int GetHeight() const { return m_height; }
int GetStride() const { return m_stride; }
bool IsColour() const { return m_colour; }
bool IsAlpha() const { return m_alpha; }
u8 const* GetRow( int row ) const { return ( u8* )m_rows[row]; }
private:
PngReadStruct m_png;
int m_width;
int m_height;
int m_stride;
bool m_colour;
bool m_alpha;
png_bytep* m_rows;
};
PngImage::PngImage( std::string const& fileName )
{
// open the source file
File file( fopen( fileName.c_str(), "rb" ) );
if( !file.IsValid() )
{
std::ostringstream oss;
oss << "failed to open \"" << fileName << "\" for reading";
throw Error( oss.str() );
}
// check the signature bytes
png_byte header[8];
fread( header, 1, 8, file.Get() );
if( png_sig_cmp( header, 0, 8 ) )
{
std::ostringstream oss;
oss << "\"" << fileName << "\" does not look like a png file";
throw Error( oss.str() );
}
// read the image into memory
png_init_io( m_png.GetPng(), file.Get() );
png_set_sig_bytes( m_png.GetPng(), 8 );
png_read_png( m_png.GetPng(), m_png.GetInfo(), PNG_TRANSFORM_EXPAND, 0 );
// get the image info
png_uint_32 width;
png_uint_32 height;
int bitDepth;
int colourType;
png_get_IHDR( m_png.GetPng(), m_png.GetInfo(), &width, &height, &bitDepth, &colourType, 0, 0, 0 );
// check the image is 8 bit
if( bitDepth != 8 )
{
std::ostringstream oss;
oss << "cannot process " << bitDepth << "-bit image (bit depth must be 8)";
throw Error( oss.str() );
}
// save the info
m_width = width;
m_height = height;
m_colour = ( ( colourType & PNG_COLOR_MASK_COLOR ) != 0 );
m_alpha = ( ( colourType & PNG_COLOR_MASK_ALPHA ) != 0 );
m_stride = ( m_colour ? 3 : 1 ) + ( m_alpha ? 1 : 0 );
// get the image rows
m_rows = png_get_rows( m_png.GetPng(), m_png.GetInfo() );
if( !m_rows )
throw Error( "failed to get image rows" );
}
static void Compress( std::string const& sourceFileName, std::string const& targetFileName, int flags )
{
// load the source image
PngImage sourceImage( sourceFileName );
// get the image info
int width = sourceImage.GetWidth();
int height = sourceImage.GetHeight();
int stride = sourceImage.GetStride();
bool colour = sourceImage.IsColour();
bool alpha = sourceImage.IsAlpha();
// check the image dimensions
if( ( width % 4 ) != 0 || ( height % 4 ) != 0 )
{
std::ostringstream oss;
oss << "cannot compress " << width << "x" << height
<< "image (dimensions must be multiples of 4)";
throw Error( oss.str() );
}
// create the target data
int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
int targetDataSize = bytesPerBlock*width*height/16;
Mem targetData( targetDataSize );
// loop over blocks and compress them
clock_t start = std::clock();
u8* targetBlock = targetData.Get();
for( int y = 0; y < height; y += 4 )
{
// process a row of blocks
for( int x = 0; x < width; x += 4 )
{
// get the block data
u8 sourceRgba[16*4];
for( int py = 0, i = 0; py < 4; ++py )
{
u8 const* row = sourceImage.GetRow( y + py ) + x*stride;
for( int px = 0; px < 4; ++px, ++i )
{
// get the pixel colour
if( colour )
{
for( int j = 0; j < 3; ++j )
sourceRgba[4*i + j] = *row++;
}
else
{
for( int j = 0; j < 3; ++j )
sourceRgba[4*i + j] = *row;
++row;
}
// skip alpha for now
if( alpha )
sourceRgba[4*i + 3] = *row++;
else
sourceRgba[4*i + 3] = 255;
}
}
// compress this block
Compress( sourceRgba, targetBlock, flags );
// advance
targetBlock += bytesPerBlock;
}
}
clock_t end = std::clock();
double duration = ( double )( end - start ) / CLOCKS_PER_SEC;
std::cout << "time taken: " << duration << " seconds" << std::endl;
// open the target file
File targetFile( fopen( targetFileName.c_str(), "wb" ) );
if( !targetFile.IsValid() )
{
std::ostringstream oss;
oss << "failed to open \"" << sourceFileName << "\" for writing";
throw Error( oss.str() );
}
// write the header
fwrite( &width, sizeof( int ), 1, targetFile.Get() );
fwrite( &height, sizeof( int ), 1, targetFile.Get() );
// write the data
fwrite( targetData.Get(), 1, targetDataSize, targetFile.Get() );
}
static void Decompress( std::string const& sourceFileName, std::string const& targetFileName, int flags )
{
// open the source file
File sourceFile( fopen( sourceFileName.c_str(), "rb" ) );
if( !sourceFile.IsValid() )
{
std::ostringstream oss;
oss << "failed to open \"" << sourceFileName << "\" for reading";
throw Error( oss.str() );
}
// get the width and height
int width, height;
fread( &width, sizeof( int ), 1, sourceFile.Get() );
fread( &height, sizeof( int ), 1, sourceFile.Get() );
// work out the data size
int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
int sourceDataSize = bytesPerBlock*width*height/16;
Mem sourceData( sourceDataSize );
// read the source data
fread( sourceData.Get(), 1, sourceDataSize, sourceFile.Get() );
// create the target rows
PngRows targetRows( width, height, 4 );
// loop over blocks and compress them
u8 const* sourceBlock = sourceData.Get();
for( int y = 0; y < height; y += 4 )
{
// process a row of blocks
for( int x = 0; x < width; x += 4 )
{
// decompress back
u8 targetRgba[16*4];
Decompress( targetRgba, sourceBlock, flags );
// write the data into the target rows
for( int py = 0, i = 0; py < 4; ++py )
{
u8* row = ( u8* )targetRows.Get()[y + py] + x*4;
for( int px = 0; px < 4; ++px, ++i )
{
for( int j = 0; j < 4; ++j )
*row++ = targetRgba[4*i + j];
}
}
// advance
sourceBlock += bytesPerBlock;
}
}
// create the target PNG
PngWriteStruct targetPng;
// set up the image
png_set_IHDR(
targetPng.GetPng(), targetPng.GetInfo(), width, height,
8, PNG_COLOR_TYPE_RGBA, PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT
);
// open the target file
File targetFile( fopen( targetFileName.c_str(), "wb" ) );
if( !targetFile.IsValid() )
{
std::ostringstream oss;
oss << "failed to open \"" << targetFileName << "\" for writing";
throw Error( oss.str() );
}
// write the image
png_set_rows( targetPng.GetPng(), targetPng.GetInfo(), targetRows.Get() );
png_init_io( targetPng.GetPng(), targetFile.Get() );
png_write_png( targetPng.GetPng(), targetPng.GetInfo(), PNG_TRANSFORM_IDENTITY, 0 );
}
static void Diff( std::string const& sourceFileName, std::string const& targetFileName )
{
// load the images
PngImage sourceImage( sourceFileName );
PngImage targetImage( targetFileName );
// get the image info
int width = sourceImage.GetWidth();
int height = sourceImage.GetHeight();
int sourceStride = sourceImage.GetStride();
int targetStride = targetImage.GetStride();
int stride = std::min( sourceStride, targetStride );
// check they match
if( width != targetImage.GetWidth() || height != targetImage.GetHeight() )
throw Error( "source and target dimensions do not match" );
// work out the error
double error = 0.0;
for( int y = 0; y < height; ++y )
{
u8 const* sourceRow = sourceImage.GetRow( y );
u8 const* targetRow = targetImage.GetRow( y );
for( int x = 0; x < width; ++x )
{
u8 const* sourcePixel = sourceRow + x*sourceStride;
u8 const* targetPixel = targetRow + x*targetStride;
for( int i = 0; i < stride; ++i )
{
int diff = ( int )sourcePixel[i] - ( int )targetPixel[i];
error += ( double )( diff*diff );
}
}
}
error = std::sqrt( error / ( width*height ) );
// print it out
std::cout << "rms error: " << error << std::endl;
}
enum Mode
{
kCompress,
kDecompress,
kDiff
};
int main( int argc, char* argv[] )
{
try
{
// parse the command-line
std::string sourceFileName;
std::string targetFileName;
Mode mode = kCompress;
int method = kDxt1;
int metric = kColourMetricPerceptual;
int fit = kColourClusterFit;
int extra = 0;
bool help = false;
bool arguments = true;
for( int i = 1; i < argc; ++i )
{
// check for options
char const* word = argv[i];
if( arguments && word[0] == '-' )
{
for( int j = 1; word[j] != '\0'; ++j )
{
switch( word[j] )
{
case 'h': help = true; break;
case 'c': mode = kCompress; break;
case 'd': mode = kDecompress; break;
case 'e': mode = kDiff; break;
case '1': method = kDxt1; break;
case '3': method = kDxt3; break;
case '5': method = kDxt5; break;
case 'u': metric = kColourMetricUniform; break;
case 'r': fit = kColourRangeFit; break;
case 'w': extra = kWeightColourByAlpha; break;
case '-': arguments = false; break;
default:
std::cerr << "unknown option '" << word[j] << "'" << std::endl;
return -1;
}
}
}
else
{
if( sourceFileName.empty() )
sourceFileName.assign( word );
else if( targetFileName.empty() )
targetFileName.assign( word );
else
{
std::cerr << "unexpected argument \"" << word << "\"" << std::endl;
}
}
}
// check arguments
if( help )
{
std::cout
<< "SYNTAX" << std::endl
<< "\tsquishpng [-cde135] <source> <target>" << std::endl
<< "OPTIONS" << std::endl
<< "\t-c\tCompress source png to target raw dxt (default)" << std::endl
<< "\t-135\tSpecifies whether to use DXT1 (default), DXT3 or DXT5 compression" << std::endl
<< "\t-u\tUse a uniform colour metric during colour compression" << std::endl
<< "\t-r\tUse the fast but inferior range-based colour compressor" << std::endl
<< "\t-w\tWeight colour values by alpha in the cluster colour compressor" << std::endl
<< "\t-d\tDecompress source raw dxt to target png" << std::endl
<< "\t-e\tDiff source and target png" << std::endl
;
return 0;
}
if( sourceFileName.empty() )
{
std::cerr << "no source file given" << std::endl;
return -1;
}
if( targetFileName.empty() )
{
std::cerr << "no target file given" << std::endl;
return -1;
}
// do the work
switch( mode )
{
case kCompress:
Compress( sourceFileName, targetFileName, method | metric | fit | extra );
break;
case kDecompress:
Decompress( sourceFileName, targetFileName, method );
break;
case kDiff:
Diff( sourceFileName, targetFileName );
break;
default:
std::cerr << "unknown mode" << std::endl;
throw std::exception();
}
}
catch( std::exception& excuse )
{
// complain
std::cerr << "squishpng error: " << excuse.what() << std::endl;
return -1;
}
// done
return 0;
}

View File

@ -0,0 +1,205 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
/*! @file
@brief This program tests the error for 1 and 2-colour DXT compression.
This tests the effectiveness of the DXT compression algorithm for all
possible 1 and 2-colour blocks of pixels.
*/
#include <squish.h>
#include <iostream>
#include <cmath>
#include <cfloat>
using namespace squish;
double GetColourError( u8 const* a, u8 const* b )
{
double error = 0.0;
for( int i = 0; i < 16; ++i )
{
for( int j = 0; j < 3; ++j )
{
int index = 4*i + j;
int diff = ( int )a[index] - ( int )b[index];
error += ( double )( diff*diff );
}
}
return error / 16.0;
}
void TestOneColour( int flags )
{
u8 input[4*16];
u8 output[4*16];
u8 block[16];
double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
int counter = 0;
// test all single-channel colours
for( int i = 0; i < 16*4; ++i )
input[i] = ( ( i % 4 ) == 3 ) ? 255 : 0;
for( int channel = 0; channel < 3; ++channel )
{
for( int value = 0; value < 255; ++value )
{
// set the channnel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = ( u8 )value;
// compress and decompress
Compress( input, block, flags );
Decompress( output, block, flags );
// test the results
double rm = GetColourError( input, output );
double rms = std::sqrt( rm );
// accumulate stats
min = std::min( min, rms );
max = std::max( max, rms );
avg += rm;
++counter;
}
// reset the channel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = 0;
}
// finish stats
avg = std::sqrt( avg/counter );
// show stats
std::cout << "one colour error (min, max, avg): "
<< min << ", " << max << ", " << avg << std::endl;
}
void TestOneColourRandom( int flags )
{
u8 input[4*16];
u8 output[4*16];
u8 block[16];
double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
int counter = 0;
// test all single-channel colours
for( int test = 0; test < 1000; ++test )
{
// set a constant random colour
for( int channel = 0; channel < 3; ++channel )
{
u8 value = ( u8 )( rand() & 0xff );
for( int i = 0; i < 16; ++i )
input[4*i + channel] = value;
}
for( int i = 0; i < 16; ++i )
input[4*i + 3] = 255;
// compress and decompress
Compress( input, block, flags );
Decompress( output, block, flags );
// test the results
double rm = GetColourError( input, output );
double rms = std::sqrt( rm );
// accumulate stats
min = std::min( min, rms );
max = std::max( max, rms );
avg += rm;
++counter;
}
// finish stats
avg = std::sqrt( avg/counter );
// show stats
std::cout << "random one colour error (min, max, avg): "
<< min << ", " << max << ", " << avg << std::endl;
}
void TestTwoColour( int flags )
{
u8 input[4*16];
u8 output[4*16];
u8 block[16];
double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
int counter = 0;
// test all single-channel colours
for( int i = 0; i < 16*4; ++i )
input[i] = ( ( i % 4 ) == 3 ) ? 255 : 0;
for( int channel = 0; channel < 3; ++channel )
{
for( int value1 = 0; value1 < 255; ++value1 )
{
for( int value2 = value1 + 1; value2 < 255; ++value2 )
{
// set the channnel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = ( u8 )( ( i < 8 ) ? value1 : value2 );
// compress and decompress
Compress( input, block, flags );
Decompress( output, block, flags );
// test the results
double rm = GetColourError( input, output );
double rms = std::sqrt( rm );
// accumulate stats
min = std::min( min, rms );
max = std::max( max, rms );
avg += rm;
++counter;
}
}
// reset the channel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = 0;
}
// finish stats
avg = std::sqrt( avg/counter );
// show stats
std::cout << "two colour error (min, max, avg): "
<< min << ", " << max << ", " << avg << std::endl;
}
int main()
{
TestOneColourRandom( kDxt1 | kColourRangeFit );
TestOneColour( kDxt1 );
TestTwoColour( kDxt1 );
}

View File

@ -0,0 +1,673 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "fastclusterfit.h"
#include "colourset.h"
#include "colourblock.h"
#include <cfloat>
namespace squish {
FastClusterFit::FastClusterFit( ColourSet const* colours, int flags ) :
ColourFit( colours, flags )
{
// initialise the best error
#if SQUISH_USE_SIMD
m_besterror = VEC4_CONST( FLT_MAX );
#else
m_besterror = FLT_MAX;
#endif
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// get the covariance matrix
Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
// compute the principle component
Vec3 principle = ComputePrincipleComponent( covariance );
// build the list of values
float dps[16];
for( int i = 0; i < count; ++i )
{
dps[i] = Dot( values[i], principle );
m_order[i] = i;
}
// stable sort
for( int i = 0; i < count; ++i )
{
for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
{
std::swap( dps[j], dps[j - 1] );
std::swap( m_order[j], m_order[j - 1] );
}
}
// weight all the points
#if SQUISH_USE_SIMD
Vec4 const* unweighted = m_colours->GetPointsSimd();
m_xxsum = VEC4_CONST( 0.0f );
m_xsum = VEC4_CONST( 0.0f );
#else
Vec3 const* unweighted = m_colours->GetPoints();
m_xxsum = Vec3( 0.0f );
m_xsum = Vec3( 0.0f );
#endif
for( int i = 0; i < count; ++i )
{
int p = m_order[i];
m_unweighted[i] = unweighted[p];
m_xxsum += m_unweighted[i]*m_unweighted[i];
m_xsum += m_unweighted[i];
}
}
struct Precomp {
float alpha2_sum;
float beta2_sum;
float alphabeta_sum;
float factor;
};
static Precomp s_threeElement[153];
static Precomp s_fourElement[969];
void FastClusterFit::doPrecomputation()
{
int i = 0;
// Three element clusters:
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
int c2 = 16 - c0 - c1;
/*if (c2 == 16) {
// a = b = x2 / 16
s_threeElement[i].alpha2_sum = 0;
s_threeElement[i].beta2_sum = 16;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_threeElement[i].alpha2_sum = 16;
s_threeElement[i].beta2_sum = 0;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_threeElement[i].alpha2_sum = c0 + c1 * 0.25f;
s_threeElement[i].beta2_sum = c2 + c1 * 0.25f;
s_threeElement[i].alphabeta_sum = c1 * 0.25f;
s_threeElement[i].factor = 1.0f / (s_threeElement[i].alpha2_sum * s_threeElement[i].beta2_sum - s_threeElement[i].alphabeta_sum * s_threeElement[i].alphabeta_sum);
}
i++;
}
}
//printf("%d three cluster elements\n", i);
// Four element clusters:
i = 0;
for( int c0 = 0; c0 <= 16; c0++)
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
int c3 = 16 - c0 - c1 - c2;
/*if (c3 == 16) {
// a = b = x3 / 16
s_fourElement[i].alpha2_sum = 16.0f;
s_fourElement[i].beta2_sum = 0.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_fourElement[i].alpha2_sum = 0.0f;
s_fourElement[i].beta2_sum = 16.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_fourElement[i].alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
s_fourElement[i].beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
s_fourElement[i].alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
s_fourElement[i].factor = 1.0f / (s_fourElement[i].alpha2_sum * s_fourElement[i].beta2_sum - s_fourElement[i].alphabeta_sum * s_fourElement[i].alphabeta_sum);
}
i++;
}
}
}
//printf("%d four cluster elements\n", i);
}
void FastClusterFit::setMetric(float r, float g, float b)
{
#if SQUISH_USE_SIMD
m_metric = Vec4(r, g, b, 0);
#else
m_metric = Vec3(r, g, b);
#endif
}
float FastClusterFit::bestError() const
{
#if SQUISH_USE_SIMD
Vec4 x = m_xxsum * m_metric;
Vec4 error = m_besterror + x.SplatX() + x.SplatY() + x.SplatZ();
return error.GetVec3().X();
#else
return m_besterror + Dot(m_xxsum, m_metric);
#endif
}
#if SQUISH_USE_SIMD
void FastClusterFit::Compress3( void* block )
{
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half = VEC4_CONST(0.5f);
Vec4 const two = VEC4_CONST(2.0);
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = VEC4_CONST( FLT_MAX );
Vec4 x0 = zero;
Vec4 x1;
int b0 = 0, b1 = 0;
int i = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
{
x1 = zero;
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec4 const alpha2_sum = VEC4_CONST(s_threeElement[i].alpha2_sum);
Vec4 const beta2_sum = VEC4_CONST(s_threeElement[i].beta2_sum);
Vec4 const alphabeta_sum = VEC4_CONST(s_threeElement[i].alphabeta_sum);
Vec4 const factor = VEC4_CONST(s_threeElement[i].factor);
i++;
Vec4 const alphax_sum = MultiplyAdd(half, x1, x0);
Vec4 const betax_sum = m_xsum - alphax_sum;
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e4 = e3 * m_metric;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
}
x1 += m_unweighted[c0+c1];
}
x0 += m_unweighted[c0];
}
// save the block if necessary
if( CompareAnyLessThan( besterror, m_besterror ) )
{
// compute indices from cluster sizes.
/*uint bestindices = 0;
{
int i = b0;
for(; i < b0+b1; i++) {
bestindices |= 2 << (2 * i);
}
for(; i < 16; i++) {
bestindices |= 1 << (2 * i);
}
}*/
u8 bestindices[16];
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
// save the block
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
// save the error
m_besterror = besterror;
}
}
void FastClusterFit::Compress4( void* block )
{
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half = VEC4_CONST(0.5f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const onethird = VEC4_CONST( 1.0f/3.0f );
Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = VEC4_CONST( FLT_MAX );
Vec4 x0 = zero;
int b0 = 0, b1 = 0, b2 = 0;
int i = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
{
Vec4 x1 = zero;
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec4 x2 = zero;
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
Vec4 const alpha2_sum = VEC4_CONST(s_fourElement[i].alpha2_sum);
Vec4 const beta2_sum = VEC4_CONST(s_fourElement[i].beta2_sum);
Vec4 const alphabeta_sum = VEC4_CONST(s_fourElement[i].alphabeta_sum);
Vec4 const factor = VEC4_CONST(s_fourElement[i].factor);
i++;
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);
Vec4 const betax_sum = m_xsum - alphax_sum;
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e4 = e3 * m_metric;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
b2 = c2;
}
x2 += m_unweighted[c0+c1+c2];
}
x1 += m_unweighted[c0+c1];
}
x0 += m_unweighted[c0];
}
// save the block if necessary
if( CompareAnyLessThan( besterror, m_besterror ) )
{
// compute indices from cluster sizes.
/*uint bestindices = 0;
{
int i = b0;
for(; i < b0+b1; i++) {
bestindices = 2 << (2 * m_order[i]);
}
for(; i < b0+b1+b2; i++) {
bestindices = 3 << (2 * m_order[i]);
}
for(; i < 16; i++) {
bestindices = 1 << (2 * m_order[i]);
}
}*/
u8 bestindices[16];
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
// save the block
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
// save the error
m_besterror = besterror;
}
}
#else
void FastClusterFit::Compress3( void* block )
{
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
float besterror = FLT_MAX;
Vec3 x0(0.0f);
Vec3 x1;
int b0 = 0, b1 = 0;
int i = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
{
x1 = Vec3(0);
for( int c1 = 0; c1 <= 16-c0; c1++)
{
float const alpha2_sum = s_threeElement[i].alpha2_sum;
float const beta2_sum = s_threeElement[i].beta2_sum;
float const alphabeta_sum = s_threeElement[i].alphabeta_sum;
float const factor = s_threeElement[i].factor;
i++;
Vec3 const alphax_sum = x0 + x1 * 0.5f;
Vec3 const betax_sum = m_xsum - alphax_sum;
Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
// compute the error
Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
// apply the metric to the error term
float error = Dot( e1, m_metric );
// keep the solution if it wins
if( error < besterror )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
}
x1 += m_unweighted[c0+c1];
}
x0 += m_unweighted[c0];
}
// save the block if necessary
if( besterror < m_besterror )
{
// compute indices from cluster sizes.
/*uint bestindices = 0;
{
int i = b0;
for(; i < b0+b1; i++) {
bestindices |= 2 << (2 * m_order[i]);
}
for(; i < 16; i++) {
bestindices |= 1 << (2 * m_order[i]);
}
}*/
u8 bestindices[16];
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
// save the block
WriteColourBlock3( beststart, bestend, ordered, block );
// save the error
m_besterror = besterror;
}
}
void FastClusterFit::Compress4( void* block )
{
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
float besterror = FLT_MAX;
Vec3 x0(0.0f);
Vec3 x1;
Vec3 x2;
int b0 = 0, b1 = 0, b2 = 0;
int i = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
{
x1 = Vec3(0.0f);
for( int c1 = 0; c1 <= 16-c0; c1++)
{
x2 = Vec3(0.0f);
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
float const alpha2_sum = s_fourElement[i].alpha2_sum;
float const beta2_sum = s_fourElement[i].beta2_sum;
float const alphabeta_sum = s_fourElement[i].alphabeta_sum;
float const factor = s_fourElement[i].factor;
i++;
Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
Vec3 const betax_sum = m_xsum - alphax_sum;
Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
// compute the error
Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
// apply the metric to the error term
float error = Dot( e1, m_metric );
// keep the solution if it wins
if( error < besterror )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
b2 = c2;
}
x2 += m_unweighted[c0+c1+c2];
}
x1 += m_unweighted[c0+c1];
}
x0 += m_unweighted[c0];
}
// save the block if necessary
if( besterror < m_besterror )
{
// compute indices from cluster sizes.
/*uint bestindices = 0;
{
int i = b0;
for(; i < b0+b1; i++) {
bestindices = 2 << (2 * m_order[i]);
}
for(; i < b0+b1+b2; i++) {
bestindices = 3 << (2 * m_order[i]);
}
for(; i < 16; i++) {
bestindices = 1 << (2 * m_order[i]);
}
}*/
u8 bestindices[16];
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
// save the block
WriteColourBlock4( beststart, bestend, ordered, block );
// save the error
m_besterror = besterror;
}
}
#endif
} // namespace squish

View File

@ -0,0 +1,74 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_FASTCLUSTERFIT_H
#define SQUISH_FASTCLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace squish {
class FastClusterFit : public ColourFit
{
public:
FastClusterFit( ColourSet const* colours, int flags );
void setMetric(float r, float g, float b);
float bestError() const;
static void doPrecomputation();
// Make them public
virtual void Compress3( void* block );
virtual void Compress4( void* block );
private:
Vec3 m_principle;
#if SQUISH_USE_SIMD
Vec4 m_unweighted[16];
Vec4 m_metric;
Vec4 m_xxsum;
Vec4 m_xsum;
Vec4 m_besterror;
#else
Vec3 m_unweighted[16];
Vec3 m_metric;
Vec3 m_xxsum;
Vec3 m_xsum;
float m_besterror;
#endif
int m_order[16];
};
} // namespace squish
#endif // ndef SQUISH_FASTCLUSTERFIT_H

252
src/nvtt/squish/maths.cpp Normal file
View File

@ -0,0 +1,252 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
/*! @file
The symmetric eigensystem solver algorithm is from
http://www.geometrictools.com/Documentation/EigenSymmetric3x3.pdf
*/
#include "maths.h"
#include <cfloat>
namespace squish {
Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights )
{
// compute the centroid
float total = 0.0f;
Vec3 centroid( 0.0f );
for( int i = 0; i < n; ++i )
{
total += weights[i];
centroid += weights[i]*points[i];
}
centroid /= total;
// accumulate the covariance matrix
Sym3x3 covariance( 0.0f );
for( int i = 0; i < n; ++i )
{
Vec3 a = points[i] - centroid;
Vec3 b = weights[i]*a;
covariance[0] += a.X()*b.X();
covariance[1] += a.X()*b.Y();
covariance[2] += a.X()*b.Z();
covariance[3] += a.Y()*b.Y();
covariance[4] += a.Y()*b.Z();
covariance[5] += a.Z()*b.Z();
}
// return it
return covariance;
}
/*
static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
{
// compute M
Sym3x3 m;
m[0] = matrix[0] - evalue;
m[1] = matrix[1];
m[2] = matrix[2];
m[3] = matrix[3] - evalue;
m[4] = matrix[4];
m[5] = matrix[5] - evalue;
// compute U
Sym3x3 u;
u[0] = m[3]*m[5] - m[4]*m[4];
u[1] = m[2]*m[4] - m[1]*m[5];
u[2] = m[1]*m[4] - m[2]*m[3];
u[3] = m[0]*m[5] - m[2]*m[2];
u[4] = m[1]*m[2] - m[4]*m[0];
u[5] = m[0]*m[3] - m[1]*m[1];
// find the largest component
float mc = std::fabs( u[0] );
int mi = 0;
for( int i = 1; i < 6; ++i )
{
float c = std::fabs( u[i] );
if( c > mc )
{
mc = c;
mi = i;
}
}
// pick the column with this component
switch( mi )
{
case 0:
return Vec3( u[0], u[1], u[2] );
case 1:
case 3:
return Vec3( u[1], u[3], u[4] );
default:
return Vec3( u[2], u[4], u[5] );
}
}
static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue )
{
// compute M
Sym3x3 m;
m[0] = matrix[0] - evalue;
m[1] = matrix[1];
m[2] = matrix[2];
m[3] = matrix[3] - evalue;
m[4] = matrix[4];
m[5] = matrix[5] - evalue;
// find the largest component
float mc = std::fabs( m[0] );
int mi = 0;
for( int i = 1; i < 6; ++i )
{
float c = std::fabs( m[i] );
if( c > mc )
{
mc = c;
mi = i;
}
}
// pick the first eigenvector based on this index
switch( mi )
{
case 0:
case 1:
return Vec3( -m[1], m[0], 0.0f );
case 2:
return Vec3( m[2], 0.0f, -m[0] );
case 3:
case 4:
return Vec3( 0.0f, -m[4], m[3] );
default:
return Vec3( 0.0f, -m[5], m[4] );
}
}
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
// compute the cubic coefficients
float c0 = matrix[0]*matrix[3]*matrix[5]
+ 2.0f*matrix[1]*matrix[2]*matrix[4]
- matrix[0]*matrix[4]*matrix[4]
- matrix[3]*matrix[2]*matrix[2]
- matrix[5]*matrix[1]*matrix[1];
float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5]
- matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4];
float c2 = matrix[0] + matrix[3] + matrix[5];
// compute the quadratic coefficients
float a = c1 - ( 1.0f/3.0f )*c2*c2;
float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0;
// compute the root count check
float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a;
// test the multiplicity
if( FLT_EPSILON < Q )
{
// only one root, which implies we have a multiple of the identity
return Vec3( 1.0f );
}
else if( Q < -FLT_EPSILON )
{
// three distinct roots
float theta = std::atan2( std::sqrt( -Q ), -0.5f*b );
float rho = std::sqrt( 0.25f*b*b - Q );
float rt = std::pow( rho, 1.0f/3.0f );
float ct = std::cos( theta/3.0f );
float st = std::sin( theta/3.0f );
float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct;
float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st );
float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st );
// pick the larger
if( std::fabs( l2 ) > std::fabs( l1 ) )
l1 = l2;
if( std::fabs( l3 ) > std::fabs( l1 ) )
l1 = l3;
// get the eigenvector
return GetMultiplicity1Evector( matrix, l1 );
}
else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON )
{
// two roots
float rt;
if( b < 0.0f )
rt = -std::pow( -0.5f*b, 1.0f/3.0f );
else
rt = std::pow( 0.5f*b, 1.0f/3.0f );
float l1 = ( 1.0f/3.0f )*c2 + rt; // repeated
float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt;
// get the eigenvector
if( std::fabs( l1 ) > std::fabs( l2 ) )
return GetMultiplicity2Evector( matrix, l1 );
else
return GetMultiplicity1Evector( matrix, l2 );
}
}
*/
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
const int NUM = 8;
Vec3 v(1, 1, 1);
for(int i = 0; i < NUM; i++) {
float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2];
float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4];
float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5];
float norm = std::max(std::max(x, y), z);
float iv = 1.0f / norm;
if (norm == 0.0f) { // @@ I think this is not necessary in this case!!
return Vec3(0.0f);
}
v = Vec3(x*iv, y*iv, z*iv);
}
return v;
}
} // namespace squish

239
src/nvtt/squish/maths.h Normal file
View File

@ -0,0 +1,239 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_MATHS_H
#define SQUISH_MATHS_H
#include <cmath>
#include <algorithm>
#include "config.h"
namespace squish {
class Vec3
{
public:
typedef Vec3 const& Arg;
Vec3()
{
}
explicit Vec3( float a )
{
m_x = a;
m_y = a;
m_z = a;
}
Vec3( float a, float b, float c )
{
m_x = a;
m_y = b;
m_z = c;
}
float X() const { return m_x; }
float Y() const { return m_y; }
float Z() const { return m_z; }
Vec3 operator-() const
{
return Vec3( -m_x, -m_y, -m_z );
}
Vec3& operator+=( Arg v )
{
m_x += v.m_x;
m_y += v.m_y;
m_z += v.m_z;
return *this;
}
Vec3& operator-=( Arg v )
{
m_x -= v.m_x;
m_y -= v.m_y;
m_z -= v.m_z;
return *this;
}
Vec3& operator*=( Arg v )
{
m_x *= v.m_x;
m_y *= v.m_y;
m_z *= v.m_z;
return *this;
}
Vec3& operator*=( float s )
{
m_x *= s;
m_y *= s;
m_z *= s;
return *this;
}
Vec3& operator/=( Arg v )
{
m_x /= v.m_x;
m_y /= v.m_y;
m_z /= v.m_z;
return *this;
}
Vec3& operator/=( float s )
{
float t = 1.0f/s;
m_x *= t;
m_y *= t;
m_z *= t;
return *this;
}
friend Vec3 operator+( Arg left, Arg right )
{
Vec3 copy( left );
return copy += right;
}
friend Vec3 operator+( Arg left, float right )
{
Vec3 copy( left );
return copy += Vec3(right);
}
friend Vec3 operator-( Arg left, Arg right )
{
Vec3 copy( left );
return copy -= right;
}
friend Vec3 operator*( Arg left, Arg right )
{
Vec3 copy( left );
return copy *= right;
}
friend Vec3 operator*( Arg left, float right )
{
Vec3 copy( left );
return copy *= right;
}
friend Vec3 operator*( float left, Arg right )
{
Vec3 copy( right );
return copy *= left;
}
friend Vec3 operator/( Arg left, Arg right )
{
Vec3 copy( left );
return copy /= right;
}
friend Vec3 operator/( Arg left, float right )
{
Vec3 copy( left );
return copy /= right;
}
friend float Dot( Arg left, Arg right )
{
return left.m_x*right.m_x + left.m_y*right.m_y + left.m_z*right.m_z;
}
friend Vec3 Min( Arg left, Arg right )
{
return Vec3(
std::min( left.m_x, right.m_x ),
std::min( left.m_y, right.m_y ),
std::min( left.m_z, right.m_z )
);
}
friend Vec3 Max( Arg left, Arg right )
{
return Vec3(
std::max( left.m_x, right.m_x ),
std::max( left.m_y, right.m_y ),
std::max( left.m_z, right.m_z )
);
}
friend Vec3 Floor( Arg v )
{
return Vec3(
std::floor( v.m_x ),
std::floor( v.m_y ),
std::floor( v.m_z )
);
}
private:
float m_x;
float m_y;
float m_z;
};
inline float LengthSquared( Vec3::Arg v )
{
return Dot( v, v );
}
class Sym3x3
{
public:
Sym3x3()
{
}
Sym3x3( float a )
{
for( int i = 0; i < 6; ++i )
m_x[i] = a;
}
float operator[]( int index ) const
{
return m_x[index];
}
float& operator[]( int index )
{
return m_x[index];
}
private:
float m_x[6];
};
Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights );
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix );
} // namespace squish
#endif // ndef SQUISH_MATHS_H

View File

@ -0,0 +1,202 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "rangefit.h"
#include "colourset.h"
#include "colourblock.h"
#include <cfloat>
namespace squish {
RangeFit::RangeFit( ColourSet const* colours, int flags )
: ColourFit( colours, flags )
{
// initialise the metric
bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
if( perceptual )
m_metric = Vec3( 0.2126f, 0.7152f, 0.0722f );
else
m_metric = Vec3( 1.0f );
// initialise the best error
m_besterror = FLT_MAX;
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
float const* weights = m_colours->GetWeights();
// get the covariance matrix
Sym3x3 covariance = ComputeWeightedCovariance( count, values, weights );
// compute the principle component
Vec3 principle = ComputePrincipleComponent( covariance );
// get the min and max range as the codebook endpoints
Vec3 start( 0.0f );
Vec3 end( 0.0f );
if( count > 0 )
{
float min, max;
// compute the range
start = end = values[0];
min = max = Dot( values[0], principle );
for( int i = 1; i < count; ++i )
{
float val = Dot( values[i], principle );
if( val < min )
{
start = values[i];
min = val;
}
else if( val > max )
{
end = values[i];
max = val;
}
}
}
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
start = Min( one, Max( zero, start ) );
end = Min( one, Max( zero, end ) );
// clamp to the grid and save
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
Vec3 const half( 0.5f );
m_start = Floor( grid*start + half )*gridrcp;
m_end = Floor( grid*end + half )*gridrcp;
}
void RangeFit::Compress3( void* block )
{
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// create a codebook
Vec3 codes[3];
codes[0] = m_start;
codes[1] = m_end;
codes[2] = 0.5f*m_start + 0.5f*m_end;
// match each point to the closest code
u8 closest[16];
float error = 0.0f;
for( int i = 0; i < count; ++i )
{
// find the closest code
float dist = FLT_MAX;
int idx = 0;
for( int j = 0; j < 3; ++j )
{
float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
if( d < dist )
{
dist = d;
idx = j;
}
}
// save the index
closest[i] = ( u8 )idx;
// accumulate the error
error += dist;
}
// save this scheme if it wins
if( error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( closest, indices );
// save the block
WriteColourBlock3( m_start, m_end, indices, block );
// save the error
m_besterror = error;
}
}
void RangeFit::Compress4( void* block )
{
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// create a codebook
Vec3 codes[4];
codes[0] = m_start;
codes[1] = m_end;
codes[2] = ( 2.0f/3.0f )*m_start + ( 1.0f/3.0f )*m_end;
codes[3] = ( 1.0f/3.0f )*m_start + ( 2.0f/3.0f )*m_end;
// match each point to the closest code
u8 closest[16];
float error = 0.0f;
for( int i = 0; i < count; ++i )
{
// find the closest code
float dist = FLT_MAX;
int idx = 0;
for( int j = 0; j < 4; ++j )
{
float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
if( d < dist )
{
dist = d;
idx = j;
}
}
// save the index
closest[i] = ( u8 )idx;
// accumulate the error
error += dist;
}
// save this scheme if it wins
if( error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( closest, indices );
// save the block
WriteColourBlock4( m_start, m_end, indices, block );
// save the error
m_besterror = error;
}
}
} // namespace squish

View File

@ -0,0 +1,54 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_RANGEFIT_H
#define SQUISH_RANGEFIT_H
#include <squish.h>
#include "colourfit.h"
#include "maths.h"
namespace squish {
class ColourSet;
class RangeFit : public ColourFit
{
public:
RangeFit( ColourSet const* colours, int flags );
private:
virtual void Compress3( void* block );
virtual void Compress4( void* block );
Vec3 m_metric;
Vec3 m_start;
Vec3 m_end;
float m_besterror;
};
} // squish
#endif // ndef SQUISH_RANGEFIT_H

39
src/nvtt/squish/simd.h Normal file
View File

@ -0,0 +1,39 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SIMD_H
#define SQUISH_SIMD_H
#include "maths.h"
#if SQUISH_USE_ALTIVEC
#include "simd_ve.h"
#endif
#if SQUISH_USE_SSE
#include "simd_sse.h"
#endif
#endif // ndef SQUISH_SIMD_H

192
src/nvtt/squish/simd_sse.h Normal file
View File

@ -0,0 +1,192 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SIMD_SSE_H
#define SQUISH_SIMD_SSE_H
#include <xmmintrin.h>
#if ( SQUISH_USE_SSE > 1 )
#include <emmintrin.h>
#endif
#include <cassert>
#define SQUISH_SSE_SPLAT( a ) \
( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
namespace squish {
#define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) )
class Vec4
{
public:
typedef Vec4 const& Arg;
Vec4() {}
explicit Vec4( __m128 v ) : m_v( v ) {}
Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
Vec4& operator=( Vec4 const& arg )
{
m_v = arg.m_v;
return *this;
}
Vec4( float x, float y, float z, float w )
{
m_v = _mm_setr_ps( x, y, z, w );
}
Vec3 GetVec3() const
{
#ifdef __GNUC__
__attribute__ ((__aligned__ (16))) float c[4];
#else
__declspec(align(16)) float c[4];
#endif
_mm_store_ps( c, m_v );
return Vec3( c[0], c[1], c[2] );
}
Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); }
Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); }
Vec4 SplatZ() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 2 ) ) ); }
Vec4 SplatW() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 3 ) ) ); }
Vec4& operator+=( Arg v )
{
m_v = _mm_add_ps( m_v, v.m_v );
return *this;
}
Vec4& operator-=( Arg v )
{
m_v = _mm_sub_ps( m_v, v.m_v );
return *this;
}
Vec4& operator*=( Arg v )
{
m_v = _mm_mul_ps( m_v, v.m_v );
return *this;
}
friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( _mm_add_ps( left.m_v, right.m_v ) );
}
friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( _mm_sub_ps( left.m_v, right.m_v ) );
}
friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( _mm_mul_ps( left.m_v, right.m_v ) );
}
//! Returns a*b + c
friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return Vec4( _mm_add_ps( _mm_mul_ps( a.m_v, b.m_v ), c.m_v ) );
}
//! Returns -( a*b - c )
friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return Vec4( _mm_sub_ps( c.m_v, _mm_mul_ps( a.m_v, b.m_v ) ) );
}
friend Vec4 Reciprocal( Vec4::Arg v )
{
// get the reciprocal estimate
__m128 estimate = _mm_rcp_ps( v.m_v );
// one round of Newton-Rhaphson refinement
__m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.m_v ) );
return Vec4( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) );
}
friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( _mm_min_ps( left.m_v, right.m_v ) );
}
friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( _mm_max_ps( left.m_v, right.m_v ) );
}
friend Vec4 Truncate( Vec4::Arg v )
{
#if ( SQUISH_USE_SSE == 1 )
// convert to ints
__m128 input = v.m_v;
__m64 lo = _mm_cvttps_pi32( input );
__m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) );
// convert to floats
__m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) );
__m128 truncated = _mm_cvtpi32_ps( part, lo );
// clear out the MMX multimedia state to allow FP calls later
_mm_empty();
return Vec4( truncated );
#else
// use SSE2 instructions
return Vec4( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.m_v ) ) );
#endif
}
friend Vec4 CompareEqual( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( _mm_cmpeq_ps( left.m_v, right.m_v ) );
}
friend Vec4 Select( Vec4::Arg off, Vec4::Arg on, Vec4::Arg bits )
{
__m128 a = _mm_andnot_ps( bits.m_v, off.m_v );
__m128 b = _mm_and_ps( bits.m_v, on.m_v );
return Vec4( _mm_or_ps( a, b ) );
}
friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
{
__m128 bits = _mm_cmplt_ps( left.m_v, right.m_v );
int value = _mm_movemask_ps( bits );
return value != 0;
}
private:
__m128 m_v;
};
} // namespace squish
#endif // ndef SQUISH_SIMD_SSE_H

166
src/nvtt/squish/simd_ve.h Normal file
View File

@ -0,0 +1,166 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SIMD_VE_H
#define SQUISH_SIMD_VE_H
#include <altivec.h>
#undef bool
namespace squish {
#define VEC4_CONST( X ) Vec4( ( vector float )( X ) )
class Vec4
{
public:
typedef Vec4 Arg;
Vec4() {}
explicit Vec4( vector float v ) : m_v( v ) {}
Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
Vec4& operator=( Vec4 const& arg )
{
m_v = arg.m_v;
return *this;
}
Vec4( float x, float y, float z, float w )
{
union { vector float v; float c[4]; } u;
u.c[0] = x;
u.c[1] = y;
u.c[2] = z;
u.c[3] = w;
m_v = u.v;
}
Vec3 GetVec3() const
{
union { vector float v; float c[4]; } u;
u.v = m_v;
return Vec3( u.c[0], u.c[1], u.c[2] );
}
Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); }
Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); }
Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); }
Vec4 SplatW() const { return Vec4( vec_splat( m_v, 3 ) ); }
Vec4& operator+=( Arg v )
{
m_v = vec_add( m_v, v.m_v );
return *this;
}
Vec4& operator-=( Arg v )
{
m_v = vec_sub( m_v, v.m_v );
return *this;
}
Vec4& operator*=( Arg v )
{
m_v = vec_madd( m_v, v.m_v, ( vector float )( -0.0f ) );
return *this;
}
friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( vec_add( left.m_v, right.m_v ) );
}
friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( vec_sub( left.m_v, right.m_v ) );
}
friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( vec_madd( left.m_v, right.m_v, ( vector float )( -0.0f ) ) );
}
//! Returns a*b + c
friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return Vec4( vec_madd( a.m_v, b.m_v, c.m_v ) );
}
//! Returns -( a*b - c )
friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return Vec4( vec_nmsub( a.m_v, b.m_v, c.m_v ) );
}
friend Vec4 Reciprocal( Vec4::Arg v )
{
// get the reciprocal estimate
vector float estimate = vec_re( v.m_v );
// one round of Newton-Rhaphson refinement
vector float diff = vec_nmsub( estimate, v.m_v, ( vector float )( 1.0f ) );
return Vec4( vec_madd( diff, estimate, estimate ) );
}
friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( vec_min( left.m_v, right.m_v ) );
}
friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( vec_max( left.m_v, right.m_v ) );
}
friend Vec4 Truncate( Vec4::Arg v )
{
return Vec4( vec_trunc( v.m_v ) );
}
friend Vec4 CompareEqual( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( ( vector float )vec_cmpeq( left.m_v, right.m_v ) );
}
friend Vec4 Select( Vec4::Arg off, Vec4::Arg on, Vec4::Arg bits )
{
return Vec4( vec_sel( off.m_v, on.m_v, ( vector unsigned int )bits.m_v ) );
}
friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
{
return vec_any_lt( left.m_v, right.m_v ) != 0;
}
private:
vector float m_v;
};
} // namespace squish
#endif // ndef SQUISH_SIMD_VE_H

View File

@ -0,0 +1,144 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano castanyo@yahoo.es
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "singlechannelfit.h"
#include "colourset.h"
#include "colourblock.h"
#include <cfloat>
namespace squish {
SingleChannelFit::SingleChannelFit( ColourSet const* colours, int const flags )
: ColourFit( colours, flags )
{
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// Find bounds of the search space.
m_g_min = 63;
m_g_max = 0;
for(uint i = 0; i < count; i++) {
int grey = int(values[i].Y() * 255.0f); // @@ rounding?
grey = std::min(grey, 255); // clamp to [0, 1)
grey = std::max(grey, 0);
m_greys[i] = u8(grey);
m_g_min = std::min(m_g_min, grey >> 2);
m_g_max = std::max(m_g_max, grey >> 2);
}
int const g_pad = m_g_max - m_g_min + 1;
m_g_min = std::max(0, m_g_min - g_pad);
m_g_max = std::min(63, m_g_max + g_pad);
}
void SingleChannelFit::Compress3( void* block )
{
// do not do anything.
}
void SingleChannelFit::Compress4( void* block )
{
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
float const* weights = m_colours->GetWeights();
int best_g0;
int best_g1;
float best_error = FLT_MAX;
// Brute force approach, try all the possible endpoints with g0 > g1.
for(int g0 = m_g_min+1; g0 <= m_g_max; g0++) {
for(int g1 = m_g_min; g1 < g0; g1++) {
// Compute palette.
const int c0 = (g0 << 2) | (g0 >> 4);
const int c1 = (g1 << 2) | (g1 >> 4);
const int c2 = (2 * c0 + c1) / 3;
const int c3 = (2 * c1 + c0) / 3;
// Evaluate palette error.
float error = 0;
for(int i = 0; i < count; i++) {
const int grey = m_greys[i];
int min_dist = abs(c0 - grey); // Use absolute distance, not squared.
min_dist = std::min(min_dist, abs(c1 - grey));
min_dist = std::min(min_dist, abs(c2 - grey));
min_dist = std::min(min_dist, abs(c3 - grey));
error += min_dist * weights[i];
}
if(error < best_error) {
best_error = error;
best_g0 = g0;
best_g1 = g1;
}
}
}
// Compute best palette.
const int best_c0 = (best_g0 << 2) | (best_g0 >> 4);
const int best_c1 = (best_g1 << 2) | (best_g1 >> 4);
const int best_c2 = (2 * best_c0 + best_c1) / 3;
const int best_c3 = (2 * best_c1 + best_c0) / 3;
// Compute best indices.
u8 closest[16];
for(int i = 0; i < count; i++) {
const int grey = m_greys[i];
int dist = abs(best_c0 - grey);
int min_dist = dist;
int min_i = 0;
dist = abs(best_c1 - grey);
if( dist < min_dist ) { min_dist = dist; min_i = 1; }
dist = abs(best_c2 - grey);
if( dist < min_dist ) { min_dist = dist; min_i = 2; }
dist = abs(best_c3 - grey);
if( dist < min_dist ) { min_dist = dist; min_i = 3; }
closest[i] = min_i;
}
// remap the indices
u8 indices[16];
m_colours->RemapIndices( closest, indices );
// Output block.
WriteColourBlock(best_g0 << 5, best_g1 << 5, indices, block);
}
} // namespace squish

View File

@ -0,0 +1,53 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano castanyo@yahoo.es
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SINGLECHANNELFIT_H
#define SQUISH_SINGLECHANNELFIT_H
#include <squish.h>
#include "maths.h"
#include "colourfit.h"
namespace squish {
class SingleChannelFit : public ColourFit
{
public:
SingleChannelFit( ColourSet const* colours, int flags );
private:
virtual void Compress3( void* block );
virtual void Compress4( void* block );
private:
u8 m_greys[16];
int m_g_min;
int m_g_max;
};
} // namespace squish
#endif // ndef SQUISH_SINGLECHANNELFIT_H

View File

@ -0,0 +1,172 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "singlecolourfit.h"
#include "colourset.h"
#include "colourblock.h"
namespace squish {
struct SourceBlock
{
u8 start;
u8 end;
u8 error;
};
struct SingleColourLookup
{
SourceBlock sources[4];
};
#include "singlecolourlookup.inl"
static int FloatToInt( float a, int limit )
{
// use ANSI round-to-zero behaviour to get round-to-nearest
int i = ( int )( a + 0.5f );
// clamp to the limit
if( i < 0 )
i = 0;
else if( i > limit )
i = limit;
// done
return i;
}
SingleColourFit::SingleColourFit( ColourSet const* colours, int flags )
: ColourFit( colours, flags )
{
// grab the single colour
Vec3 const* values = m_colours->GetPoints();
m_colour[0] = ( u8 )FloatToInt( 255.0f*values->X(), 255 );
m_colour[1] = ( u8 )FloatToInt( 255.0f*values->Y(), 255 );
m_colour[2] = ( u8 )FloatToInt( 255.0f*values->Z(), 255 );
// initialise the best error
m_besterror = INT_MAX;
}
void SingleColourFit::Compress3( void* block )
{
// build the table of lookups
SingleColourLookup const* const lookups[] =
{
lookup_5_3,
lookup_6_3,
lookup_5_3
};
// find the best end-points and index
ComputeEndPoints( 3, lookups );
// build the block if we win
if( m_error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( &m_index, indices );
// save the block
WriteColourBlock3( m_start, m_end, indices, block );
// save the error
m_besterror = m_error;
}
}
void SingleColourFit::Compress4( void* block )
{
// build the table of lookups
SingleColourLookup const* const lookups[] =
{
lookup_5_4,
lookup_6_4,
lookup_5_4
};
// find the best end-points and index
ComputeEndPoints( 4, lookups );
// build the block if we win
if( m_error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( &m_index, indices );
// save the block
WriteColourBlock4( m_start, m_end, indices, block );
// save the error
m_besterror = m_error;
}
}
void SingleColourFit::ComputeEndPoints( int count, SingleColourLookup const* const* lookups )
{
// check each index combination
m_error = INT_MAX;
for( int index = 0; index < count; ++index )
{
// check the error for this codebook index
SourceBlock const* sources[3];
int error = 0;
for( int channel = 0; channel < 3; ++channel )
{
// grab the lookup table and index for this channel
SingleColourLookup const* lookup = lookups[channel];
int target = m_colour[channel];
// store a pointer to the source for this channel
sources[channel] = lookup[target].sources + index;
// accumulate the error
int diff = sources[channel]->error;
error += diff*diff;
}
// keep it if the error is lower
if( error < m_error )
{
m_start = Vec3(
( float )sources[0]->start/31.0f,
( float )sources[1]->start/63.0f,
( float )sources[2]->start/31.0f
);
m_end = Vec3(
( float )sources[0]->end/31.0f,
( float )sources[1]->end/63.0f,
( float )sources[2]->end/31.0f
);
m_index = ( u8 )index;
m_error = error;
}
}
}
} // namespace squish

View File

@ -0,0 +1,58 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SINGLECOLOURFIT_H
#define SQUISH_SINGLECOLOURFIT_H
#include <squish.h>
#include "colourfit.h"
namespace squish {
class ColourSet;
struct SingleColourLookup;
class SingleColourFit : public ColourFit
{
public:
SingleColourFit( ColourSet const* colours, int flags );
private:
virtual void Compress3( void* block );
virtual void Compress4( void* block );
void ComputeEndPoints( int count, SingleColourLookup const* const* lookups );
u8 m_colour[3];
Vec3 m_start;
Vec3 m_end;
u8 m_index;
int m_error;
int m_besterror;
};
} // namespace squish
#endif // ndef SQUISH_SINGLECOLOURFIT_H

File diff suppressed because it is too large Load Diff

225
src/nvtt/squish/squish.cpp Normal file
View File

@ -0,0 +1,225 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include <squish.h>
#include "colourset.h"
#include "maths.h"
#include "rangefit.h"
#include "clusterfit.h"
#include "colourblock.h"
#include "alpha.h"
#include "singlecolourfit.h"
namespace squish {
static int FixFlags( int flags )
{
// grab the flag bits
int method = flags & ( kDxt1 | kDxt3 | kDxt5 );
int fit = flags & ( kColourClusterFit | kColourRangeFit );
int metric = flags & ( kColourMetricPerceptual | kColourMetricUniform );
int extra = flags & kWeightColourByAlpha;
// set defaults
if( method != kDxt3 && method != kDxt5 )
method = kDxt1;
if( fit != kColourRangeFit )
fit = kColourClusterFit;
if( metric != kColourMetricUniform )
metric = kColourMetricPerceptual;
// done
return method | fit | metric | extra;
}
void Compress( u8 const* rgba, void* block, int flags )
{
// fix any bad flags
flags = FixFlags( flags );
// get the block locations
void* colourBlock = block;
void* alphaBock = block;
if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
colourBlock = reinterpret_cast< u8* >( block ) + 8;
// create the minimal point set
ColourSet colours( rgba, flags );
// check the compression type and compress colour
if( colours.GetCount() == 1 )
{
// always do a single colour fit
SingleColourFit fit( &colours, flags );
fit.Compress( colourBlock );
}
else if( ( flags & kColourRangeFit ) != 0 )
{
// do a range fit
RangeFit fit( &colours, flags );
fit.Compress( colourBlock );
}
else
{
// default to a cluster fit
ClusterFit fit( &colours, flags );
fit.Compress( colourBlock );
}
// compress alpha separately if necessary
if( ( flags & kDxt3 ) != 0 )
CompressAlphaDxt3( rgba, alphaBock );
else if( ( flags & kDxt5 ) != 0 )
CompressAlphaDxt5( rgba, alphaBock );
}
void Decompress( u8* rgba, void const* block, int flags )
{
// fix any bad flags
flags = FixFlags( flags );
// get the block locations
void const* colourBlock = block;
void const* alphaBock = block;
if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
// decompress colour
DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
// decompress alpha separately if necessary
if( ( flags & kDxt3 ) != 0 )
DecompressAlphaDxt3( rgba, alphaBock );
else if( ( flags & kDxt5 ) != 0 )
DecompressAlphaDxt5( rgba, alphaBock );
}
int GetStorageRequirements( int width, int height, int flags )
{
// fix any bad flags
flags = FixFlags( flags );
// compute the storage requirements
int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
int blocksize = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
return blockcount*blocksize;
}
void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags )
{
// fix any bad flags
flags = FixFlags( flags );
// initialise the block output
u8* targetBlock = reinterpret_cast< u8* >( blocks );
int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
int bh = std::min(width, 4);
int bw = std::min(height, 4);
// loop over blocks
for( int y = 0; y < height; y += 4 )
{
for( int x = 0; x < width; x += 4 )
{
// build the 4x4 block of pixels
u8 sourceRgba[16*4];
u8* targetPixel = sourceRgba;
for( int py = 0; py < 4; ++py )
{
for( int px = 0; px < 4; ++px )
{
// get the source pixel in the image
int sx = x + (px % bw);
int sy = y + (py % bh);
// copy the rgba value
u8 const* sourcePixel = rgba + 4*( width*sy + sx );
for( int i = 0; i < 4; ++i )
*targetPixel++ = *sourcePixel++;
}
}
// compress it into the output
Compress( sourceRgba, targetBlock, flags );
// advance
targetBlock += bytesPerBlock;
}
}
}
void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
{
// fix any bad flags
flags = FixFlags( flags );
// initialise the block input
u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
// loop over blocks
for( int y = 0; y < height; y += 4 )
{
for( int x = 0; x < width; x += 4 )
{
// decompress the block
u8 targetRgba[4*16];
Decompress( targetRgba, sourceBlock, flags );
// write the decompressed pixels to the correct image locations
u8 const* sourcePixel = targetRgba;
for( int py = 0; py < 4; ++py )
{
for( int px = 0; px < 4; ++px )
{
// get the target location
int sx = x + px;
int sy = y + py;
if( sx < width && sy < height )
{
u8* targetPixel = rgba + 4*( width*sy + sx );
// copy the rgba value
for( int i = 0; i < 4; ++i )
*targetPixel++ = *sourcePixel++;
}
else
{
// skip this pixel as its outside the image
sourcePixel += 4;
}
}
}
// advance
sourceBlock += bytesPerBlock;
}
}
}
} // namespace squish

244
src/nvtt/squish/squish.h Normal file
View File

@ -0,0 +1,244 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_H
#define SQUISH_H
//! All squish API functions live in this namespace.
namespace squish {
// -----------------------------------------------------------------------------
//! Typedef a quantity that is a single unsigned byte.
typedef unsigned char u8;
// -----------------------------------------------------------------------------
enum
{
//! Use DXT1 compression.
kDxt1 = ( 1 << 0 ),
//! Use DXT3 compression.
kDxt3 = ( 1 << 1 ),
//! Use DXT5 compression.
kDxt5 = ( 1 << 2 ),
//! Use a slow but high quality colour compressor (the default).
kColourClusterFit = ( 1 << 3 ),
//! Use a fast but low quality colour compressor.
kColourRangeFit = ( 1 << 4 ),
//! Use a perceptual metric for colour error (the default).
kColourMetricPerceptual = ( 1 << 5 ),
//! Use a uniform metric for colour error.
kColourMetricUniform = ( 1 << 6 ),
//! Weight the colour by alpha during cluster fit (disabled by default).
kWeightColourByAlpha = ( 1 << 7 )
};
// -----------------------------------------------------------------------------
/*! @brief Compresses a 4x4 block of pixels.
@param rgba The rgba values of the 16 source pixels.
@param block Storage for the compressed DXT block.
@param flags Compression flags.
The source pixels should be presented as a contiguous array of 16 rgba
values, with each component as 1 byte each. In memory this should be:
{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. When using DXT1
compression, 8 bytes of storage are required for the compressed DXT block.
DXT3 and DXT5 compression require 16 bytes of storage per block.
The flags parameter can also specify a preferred colour compressor and
colour error metric to use when fitting the RGB components of the data.
Possible colour compressors are: kColourClusterFit (the default) or
kColourRangeFit. Possible colour error metrics are: kColourMetricPerceptual
(the default) or kColourMetricUniform. If no flags are specified in any
particular category then the default will be used. Unknown flags are
ignored.
When using kColourClusterFit, an additional flag can be specified to
weight the colour of each pixel by its alpha value. For images that are
rendered using alpha blending, this can significantly increase the
perceived quality.
*/
void Compress( u8 const* rgba, void* block, int flags );
// -----------------------------------------------------------------------------
/*! @brief Compresses a 4x4 block of pixels.
@param rgba The rgba values of the 16 source pixels.
@param mask The valid pixel mask.
@param block Storage for the compressed DXT block.
@param flags Compression flags.
The source pixels should be presented as a contiguous array of 16 rgba
values, with each component as 1 byte each. In memory this should be:
{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
The mask parameter enables only certain pixels within the block. The lowest
bit enables the first pixel and so on up to the 16th bit. Bits beyond the
16th bit are ignored. Pixels that are not enabled are allowed to take
arbitrary colours in the output block. An example of how this can be used
is in the CompressImage function to disable pixels outside the bounds of
the image when the width or height is not divisible by 4.
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. When using DXT1
compression, 8 bytes of storage are required for the compressed DXT block.
DXT3 and DXT5 compression require 16 bytes of storage per block.
The flags parameter can also specify a preferred colour compressor and
colour error metric to use when fitting the RGB components of the data.
Possible colour compressors are: kColourClusterFit (the default) or
kColourRangeFit. Possible colour error metrics are: kColourMetricPerceptual
(the default) or kColourMetricUniform. If no flags are specified in any
particular category then the default will be used. Unknown flags are
ignored.
When using kColourClusterFit, an additional flag can be specified to
weight the colour of each pixel by its alpha value. For images that are
rendered using alpha blending, this can significantly increase the
perceived quality.
*/
void CompressMasked( u8 const* rgba, int mask, void* block, int flags );
// -----------------------------------------------------------------------------
/*! @brief Decompresses a 4x4 block of pixels.
@param rgba Storage for the 16 decompressed pixels.
@param block The compressed DXT block.
@param flags Compression flags.
The decompressed pixels will be written as a contiguous array of 16 rgba
values, with each component as 1 byte each. In memory this is:
{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. All other flags
are ignored.
*/
void Decompress( u8* rgba, void const* block, int flags );
// -----------------------------------------------------------------------------
/*! @brief Computes the amount of compressed storage required.
@param width The width of the image.
@param height The height of the image.
@param flags Compression flags.
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. All other flags
are ignored.
Most DXT images will be a multiple of 4 in each dimension, but this
function supports arbitrary size images by allowing the outer blocks to
be only partially used.
*/
int GetStorageRequirements( int width, int height, int flags );
// -----------------------------------------------------------------------------
/*! @brief Compresses an image in memory.
@param rgba The pixels of the source.
@param width The width of the source image.
@param height The height of the source image.
@param blocks Storage for the compressed output.
@param flags Compression flags.
The source pixels should be presented as a contiguous array of width*height
rgba values, with each component as 1 byte each. In memory this should be:
{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. When using DXT1
compression, 8 bytes of storage are required for each compressed DXT block.
DXT3 and DXT5 compression require 16 bytes of storage per block.
The flags parameter can also specify a preferred colour compressor and
colour error metric to use when fitting the RGB components of the data.
Possible colour compressors are: kColourClusterFit (the default) or
kColourRangeFit. Possible colour error metrics are: kColourMetricPerceptual
(the default) or kColourMetricUniform. If no flags are specified in any
particular category then the default will be used. Unknown flags are
ignored.
When using kColourClusterFit, an additional flag can be specified to
weight the colour of each pixel by its alpha value. For images that are
rendered using alpha blending, this can significantly increase the
perceived quality.
Internally this function calls squish::Compress for each block. To see how
much memory is required in the compressed image, use
squish::GetStorageRequirements.
*/
void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags );
// -----------------------------------------------------------------------------
/*! @brief Decompresses an image in memory.
@param rgba Storage for the decompressed pixels.
@param width The width of the source image.
@param height The height of the source image.
@param blocks The compressed DXT blocks.
@param flags Compression flags.
The decompressed pixels will be written as a contiguous array of width*height
16 rgba values, with each component as 1 byte each. In memory this is:
{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. All other flags
are ignored.
Internally this function calls squish::Decompress for each block.
*/
void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags );
// -----------------------------------------------------------------------------
} // namespace squish
#endif // ndef SQUISH_H

View File

@ -0,0 +1,531 @@
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 42;
objects = {
/* Begin PBXBuildFile section */
133FA0DC096A7B8E0050752E /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 133FA0DA096A7B8E0050752E /* alpha.h */; };
133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 133FA0DB096A7B8E0050752E /* alpha.cpp */; };
1342B4160999DF1900152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; };
1342B41A0999DF7000152915 /* squishpng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B4190999DF7000152915 /* squishpng.cpp */; };
1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B43E0999E0CC00152915 /* squishtest.cpp */; };
1342B4420999E0EC00152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; };
1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70B092AA857005EE038 /* clusterfit.cpp */; };
1350D71B092AA858005EE038 /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D70C092AA858005EE038 /* clusterfit.h */; };
1350D71E092AA858005EE038 /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70F092AA858005EE038 /* colourblock.cpp */; };
1350D71F092AA858005EE038 /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D710092AA858005EE038 /* colourblock.h */; };
1350D720092AA858005EE038 /* config.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D711092AA858005EE038 /* config.h */; };
1350D721092AA858005EE038 /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D712092AA858005EE038 /* maths.cpp */; };
1350D722092AA858005EE038 /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D713092AA858005EE038 /* maths.h */; };
1350D725092AA858005EE038 /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D716092AA858005EE038 /* rangefit.cpp */; };
1350D726092AA858005EE038 /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D717092AA858005EE038 /* rangefit.h */; };
1350D727092AA858005EE038 /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D718092AA858005EE038 /* squish.cpp */; };
1350D728092AA858005EE038 /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D719092AA858005EE038 /* squish.h */; settings = {ATTRIBUTES = (Public, ); }; };
139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C21CE09ADAB0800A2500D /* squishgen.cpp */; };
139C234F09B0602700A2500D /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 139C234D09B0602700A2500D /* singlecolourfit.h */; };
139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C234E09B0602700A2500D /* singlecolourfit.cpp */; };
13A7CCA40952BE63001C963A /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 13A7CCA20952BE63001C963A /* colourfit.h */; };
13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13A7CCA30952BE63001C963A /* colourfit.cpp */; };
13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13C4C7AB0941C18000AC5B89 /* colourset.cpp */; };
13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 13C4C7AC0941C18000AC5B89 /* colourset.h */; };
13CD64C2092BCF8A00488C97 /* simd.h in Headers */ = {isa = PBXBuildFile; fileRef = 13CD64C0092BCF8A00488C97 /* simd.h */; };
13D0DC910931F93A00909807 /* simd_ve.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC900931F93A00909807 /* simd_ve.h */; };
13D0DC970931F9D600909807 /* simd_sse.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC960931F9D600909807 /* simd_sse.h */; };
/* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */
1342B52B099BF72F00152915 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = squish;
};
1342B58E099BF93D00152915 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = squish;
};
/* End PBXContainerItemProxy section */
/* Begin PBXFileReference section */
133FA0DA096A7B8E0050752E /* alpha.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = "<group>"; };
133FA0DB096A7B8E0050752E /* alpha.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = "<group>"; };
1342B4110999DE7F00152915 /* squishpng */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishpng; sourceTree = BUILT_PRODUCTS_DIR; };
1342B4190999DF7000152915 /* squishpng.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = squishpng.cpp; path = extra/squishpng.cpp; sourceTree = "<group>"; };
1342B4370999E07C00152915 /* squishtest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishtest; sourceTree = BUILT_PRODUCTS_DIR; };
1342B43E0999E0CC00152915 /* squishtest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishtest.cpp; path = extra/squishtest.cpp; sourceTree = "<group>"; };
1350D70B092AA857005EE038 /* clusterfit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfit.cpp; sourceTree = "<group>"; };
1350D70C092AA858005EE038 /* clusterfit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = clusterfit.h; sourceTree = "<group>"; };
1350D70F092AA858005EE038 /* colourblock.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourblock.cpp; sourceTree = "<group>"; };
1350D710092AA858005EE038 /* colourblock.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourblock.h; sourceTree = "<group>"; };
1350D711092AA858005EE038 /* config.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = "<group>"; };
1350D712092AA858005EE038 /* maths.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = maths.cpp; sourceTree = "<group>"; };
1350D713092AA858005EE038 /* maths.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = maths.h; sourceTree = "<group>"; };
1350D716092AA858005EE038 /* rangefit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = rangefit.cpp; sourceTree = "<group>"; };
1350D717092AA858005EE038 /* rangefit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = rangefit.h; sourceTree = "<group>"; };
1350D718092AA858005EE038 /* squish.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = squish.cpp; sourceTree = "<group>"; };
1350D719092AA858005EE038 /* squish.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = squish.h; sourceTree = "<group>"; };
13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = texture_compression_s3tc.txt; sourceTree = "<group>"; };
139C21C409ADAA7000A2500D /* squishgen */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishgen; sourceTree = BUILT_PRODUCTS_DIR; };
139C21CE09ADAB0800A2500D /* squishgen.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishgen.cpp; path = extra/squishgen.cpp; sourceTree = "<group>"; };
139C234D09B0602700A2500D /* singlecolourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = singlecolourfit.h; sourceTree = "<group>"; };
139C234E09B0602700A2500D /* singlecolourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = singlecolourfit.cpp; sourceTree = "<group>"; };
139C236D09B060A900A2500D /* singlecolourlookup.inl */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = singlecolourlookup.inl; sourceTree = "<group>"; };
13A7CCA20952BE63001C963A /* colourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colourfit.h; sourceTree = "<group>"; };
13A7CCA30952BE63001C963A /* colourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colourfit.cpp; sourceTree = "<group>"; };
13C4C7AB0941C18000AC5B89 /* colourset.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourset.cpp; sourceTree = "<group>"; };
13C4C7AC0941C18000AC5B89 /* colourset.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourset.h; sourceTree = "<group>"; };
13CD64C0092BCF8A00488C97 /* simd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd.h; sourceTree = "<group>"; };
13D0DC900931F93A00909807 /* simd_ve.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_ve.h; sourceTree = "<group>"; };
13D0DC960931F9D600909807 /* simd_sse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_sse.h; sourceTree = "<group>"; };
D2AAC046055464E500DB518D /* libsquish.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsquish.a; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
1342B40F0999DE7F00152915 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
1342B4160999DF1900152915 /* libsquish.a in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
1342B4350999E07C00152915 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
1342B4420999E0EC00152915 /* libsquish.a in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
139C21C209ADAA7000A2500D /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
D289987405E68DCB004EDB86 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
08FB7794FE84155DC02AAC07 /* squish */ = {
isa = PBXGroup;
children = (
08FB7795FE84155DC02AAC07 /* Source */,
C6A0FF2B0290797F04C91782 /* Documentation */,
1AB674ADFE9D54B511CA2CBB /* Products */,
);
name = squish;
sourceTree = "<group>";
};
08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup;
children = (
133FA0DB096A7B8E0050752E /* alpha.cpp */,
133FA0DA096A7B8E0050752E /* alpha.h */,
1350D70B092AA857005EE038 /* clusterfit.cpp */,
1350D70C092AA858005EE038 /* clusterfit.h */,
13A7CCA30952BE63001C963A /* colourfit.cpp */,
13A7CCA20952BE63001C963A /* colourfit.h */,
13C4C7AB0941C18000AC5B89 /* colourset.cpp */,
13C4C7AC0941C18000AC5B89 /* colourset.h */,
1350D70F092AA858005EE038 /* colourblock.cpp */,
1350D710092AA858005EE038 /* colourblock.h */,
13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */,
1350D711092AA858005EE038 /* config.h */,
1350D712092AA858005EE038 /* maths.cpp */,
1350D713092AA858005EE038 /* maths.h */,
1350D716092AA858005EE038 /* rangefit.cpp */,
1350D717092AA858005EE038 /* rangefit.h */,
13CD64C0092BCF8A00488C97 /* simd.h */,
13D0DC960931F9D600909807 /* simd_sse.h */,
13D0DC900931F93A00909807 /* simd_ve.h */,
139C234E09B0602700A2500D /* singlecolourfit.cpp */,
139C234D09B0602700A2500D /* singlecolourfit.h */,
139C236D09B060A900A2500D /* singlecolourlookup.inl */,
1350D718092AA858005EE038 /* squish.cpp */,
1350D719092AA858005EE038 /* squish.h */,
139C21CE09ADAB0800A2500D /* squishgen.cpp */,
1342B4190999DF7000152915 /* squishpng.cpp */,
1342B43E0999E0CC00152915 /* squishtest.cpp */,
);
name = Source;
sourceTree = "<group>";
};
1AB674ADFE9D54B511CA2CBB /* Products */ = {
isa = PBXGroup;
children = (
D2AAC046055464E500DB518D /* libsquish.a */,
1342B4110999DE7F00152915 /* squishpng */,
1342B4370999E07C00152915 /* squishtest */,
139C21C409ADAA7000A2500D /* squishgen */,
);
name = Products;
sourceTree = "<group>";
};
C6A0FF2B0290797F04C91782 /* Documentation */ = {
isa = PBXGroup;
children = (
);
name = Documentation;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */
D2AAC043055464E500DB518D /* Headers */ = {
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
1350D71B092AA858005EE038 /* clusterfit.h in Headers */,
1350D71F092AA858005EE038 /* colourblock.h in Headers */,
1350D720092AA858005EE038 /* config.h in Headers */,
1350D722092AA858005EE038 /* maths.h in Headers */,
1350D726092AA858005EE038 /* rangefit.h in Headers */,
1350D728092AA858005EE038 /* squish.h in Headers */,
13CD64C2092BCF8A00488C97 /* simd.h in Headers */,
13D0DC910931F93A00909807 /* simd_ve.h in Headers */,
13D0DC970931F9D600909807 /* simd_sse.h in Headers */,
13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */,
13A7CCA40952BE63001C963A /* colourfit.h in Headers */,
133FA0DC096A7B8E0050752E /* alpha.h in Headers */,
139C234F09B0602700A2500D /* singlecolourfit.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXHeadersBuildPhase section */
/* Begin PBXNativeTarget section */
1342B4100999DE7F00152915 /* squishpng */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */;
buildPhases = (
1342B40E0999DE7F00152915 /* Sources */,
1342B40F0999DE7F00152915 /* Frameworks */,
);
buildRules = (
);
dependencies = (
1342B58F099BF93D00152915 /* PBXTargetDependency */,
);
name = squishpng;
productName = squishpng;
productReference = 1342B4110999DE7F00152915 /* squishpng */;
productType = "com.apple.product-type.tool";
};
1342B4360999E07C00152915 /* squishtest */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */;
buildPhases = (
1342B4340999E07C00152915 /* Sources */,
1342B4350999E07C00152915 /* Frameworks */,
);
buildRules = (
);
dependencies = (
1342B52C099BF72F00152915 /* PBXTargetDependency */,
);
name = squishtest;
productName = squishtest;
productReference = 1342B4370999E07C00152915 /* squishtest */;
productType = "com.apple.product-type.tool";
};
139C21C309ADAA7000A2500D /* squishgen */ = {
isa = PBXNativeTarget;
buildConfigurationList = 139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */;
buildPhases = (
139C21C109ADAA7000A2500D /* Sources */,
139C21C209ADAA7000A2500D /* Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = squishgen;
productName = squishgen;
productReference = 139C21C409ADAA7000A2500D /* squishgen */;
productType = "com.apple.product-type.tool";
};
D2AAC045055464E500DB518D /* squish */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */;
buildPhases = (
D2AAC043055464E500DB518D /* Headers */,
D2AAC044055464E500DB518D /* Sources */,
D289987405E68DCB004EDB86 /* Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = squish;
productName = squish;
productReference = D2AAC046055464E500DB518D /* libsquish.a */;
productType = "com.apple.product-type.library.static";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
08FB7793FE84155DC02AAC07 /* Project object */ = {
isa = PBXProject;
buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */;
hasScannedForEncodings = 1;
mainGroup = 08FB7794FE84155DC02AAC07 /* squish */;
projectDirPath = "";
targets = (
D2AAC045055464E500DB518D /* squish */,
1342B4100999DE7F00152915 /* squishpng */,
1342B4360999E07C00152915 /* squishtest */,
139C21C309ADAA7000A2500D /* squishgen */,
);
};
/* End PBXProject section */
/* Begin PBXSourcesBuildPhase section */
1342B40E0999DE7F00152915 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
1342B41A0999DF7000152915 /* squishpng.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
1342B4340999E07C00152915 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
139C21C109ADAA7000A2500D /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
D2AAC044055464E500DB518D /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */,
1350D71E092AA858005EE038 /* colourblock.cpp in Sources */,
1350D721092AA858005EE038 /* maths.cpp in Sources */,
1350D725092AA858005EE038 /* rangefit.cpp in Sources */,
1350D727092AA858005EE038 /* squish.cpp in Sources */,
13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */,
13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */,
133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */,
139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin PBXTargetDependency section */
1342B52C099BF72F00152915 /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
target = D2AAC045055464E500DB518D /* squish */;
targetProxy = 1342B52B099BF72F00152915 /* PBXContainerItemProxy */;
};
1342B58F099BF93D00152915 /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
target = D2AAC045055464E500DB518D /* squish */;
targetProxy = 1342B58E099BF93D00152915 /* PBXContainerItemProxy */;
};
/* End PBXTargetDependency section */
/* Begin XCBuildConfiguration section */
1342B4140999DE9F00152915 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = (
..,
/sw/include,
);
INSTALL_PATH = "$(HOME)/bin";
LIBRARY_SEARCH_PATHS = /sw/lib;
OTHER_LDFLAGS = "-lpng";
PRODUCT_NAME = squishpng;
};
name = Debug;
};
1342B4150999DE9F00152915 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = (
..,
/sw/include,
);
INSTALL_PATH = "$(HOME)/bin";
LIBRARY_SEARCH_PATHS = /sw/lib;
OTHER_LDFLAGS = "-lpng";
PRODUCT_NAME = squishpng;
};
name = Release;
};
1342B43C0999E0C000152915 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishtest;
};
name = Debug;
};
1342B43D0999E0C000152915 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishtest;
};
name = Release;
};
139C21CC09ADAB0300A2500D /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishgen;
};
name = Debug;
};
139C21CD09ADAB0300A2500D /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishgen;
};
name = Release;
};
1DEB91EC08733DB70010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
COPY_PHASE_STRIP = NO;
GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1";
INSTALL_PATH = /usr/local/lib;
OTHER_CFLAGS = "-maltivec";
PRODUCT_NAME = squish;
STRIP_INSTALLED_PRODUCT = NO;
};
name = Debug;
};
1DEB91ED08733DB70010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1";
INSTALL_PATH = /usr/local/lib;
OTHER_CFLAGS = "-maltivec";
PRODUCT_NAME = squish;
STRIP_INSTALLED_PRODUCT = YES;
};
name = Release;
};
1DEB91F008733DB70010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
GCC_DYNAMIC_NO_PIC = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_TREAT_WARNINGS_AS_ERRORS = YES;
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_PEDANTIC = YES;
GCC_WARN_SHADOW = YES;
GCC_WARN_SIGN_COMPARE = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VALUE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
PREBINDING = NO;
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
};
name = Debug;
};
1DEB91F108733DB70010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
GCC_DYNAMIC_NO_PIC = YES;
GCC_OPTIMIZATION_LEVEL = 3;
GCC_TREAT_WARNINGS_AS_ERRORS = YES;
GCC_UNROLL_LOOPS = YES;
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_PEDANTIC = YES;
GCC_WARN_SHADOW = YES;
GCC_WARN_SIGN_COMPARE = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VALUE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
PREBINDING = NO;
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1342B4140999DE9F00152915 /* Debug */,
1342B4150999DE9F00152915 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1342B43C0999E0C000152915 /* Debug */,
1342B43D0999E0C000152915 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */ = {
isa = XCConfigurationList;
buildConfigurations = (
139C21CC09ADAB0300A2500D /* Debug */,
139C21CD09ADAB0300A2500D /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1DEB91EC08733DB70010E9CD /* Debug */,
1DEB91ED08733DB70010E9CD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1DEB91F008733DB70010E9CD /* Debug */,
1DEB91F108733DB70010E9CD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
}

View File

@ -0,0 +1,508 @@
Name
EXT_texture_compression_s3tc
Name Strings
GL_EXT_texture_compression_s3tc
Contact
Pat Brown, NVIDIA Corporation (pbrown 'at' nvidia.com)
Status
FINAL
Version
1.1, 16 November 2001 (containing only clarifications relative to
version 1.0, dated 7 July 2000)
Number
198
Dependencies
OpenGL 1.1 is required.
GL_ARB_texture_compression is required.
This extension is written against the OpenGL 1.2.1 Specification.
Overview
This extension provides additional texture compression functionality
specific to S3's S3TC format (called DXTC in Microsoft's DirectX API),
subject to all the requirements and limitations described by the extension
GL_ARB_texture_compression.
This extension supports DXT1, DXT3, and DXT5 texture compression formats.
For the DXT1 image format, this specification supports an RGB-only mode
and a special RGBA mode with single-bit "transparent" alpha.
IP Status
Contact S3 Incorporated (http://www.s3.com) regarding any intellectual
property issues associated with implementing this extension.
WARNING: Vendors able to support S3TC texture compression in Direct3D
drivers do not necessarily have the right to use the same functionality in
OpenGL.
Issues
(1) Should DXT2 and DXT4 (premultiplied alpha) formats be supported?
RESOLVED: No -- insufficient interest. Supporting DXT2 and DXT4
would require some rework to the TexEnv definition (maybe add a new
base internal format RGBA_PREMULTIPLIED_ALPHA) for these formats.
Note that the EXT_texture_env_combine extension (which extends normal
TexEnv modes) can be used to support textures with premultipled alpha.
(2) Should generic "RGB_S3TC_EXT" and "RGBA_S3TC_EXT" enums be supported
or should we use only the DXT<n> enums?
RESOLVED: No. A generic RGBA_S3TC_EXT is problematic because DXT3
and DXT5 are both nominally RGBA (and DXT1 with the 1-bit alpha is
also) yet one format must be chosen up front.
(3) Should TexSubImage support all block-aligned edits or just the minimal
functionality required by the ARB_texture_compression extension?
RESOLVED: Allow all valid block-aligned edits.
(4) A pre-compressed image with a DXT1 format can be used as either an
RGB_S3TC_DXT1 or an RGBA_S3TC_DXT1 image. If the image has
transparent texels, how are they treated in each format?
RESOLVED: The renderer has to make sure that an RGB_S3TC_DXT1 format
is decoded as RGB (where alpha is effectively one for all texels),
while RGBA_S3TC_DXT1 is decoded as RGBA (where alpha is zero for all
texels with "transparent" encodings). Otherwise, the formats are
identical.
(5) Is the encoding of the RGB components for DXT1 formats correct in this
spec? MSDN documentation does not specify an RGB color for the
"transparent" encoding. Is it really black?
RESOLVED: Yes. The specification for the DXT1 format initially
required black, but later changed that requirement to a
recommendation. All vendors involved in the definition of this
specification support black. In addition, specifying black has a
useful behavior.
When blending multiple texels (GL_LINEAR filtering), mixing opaque and
transparent samples is problematic. Defining a black color on
transparent texels achieves a sensible result that works like a
texture with premultiplied alpha. For example, if three opaque white
and one transparent sample is being averaged, the result would be a
75% intensity gray (with an alpha of 75%). This is the same result on
the color channels as would be obtained using a white color, 75%
alpha, and a SRC_ALPHA blend factor.
(6) Is the encoding of the RGB components for DXT3 and DXT5 formats
correct in this spec? MSDN documentation suggests that the RGB blocks
for DXT3 and DXT5 are decoded as described by the DXT1 format.
RESOLVED: Yes -- this appears to be a bug in the MSDN documentation.
The specification for the DXT2-DXT5 formats require decoding using the
opaque block encoding, regardless of the relative values of "color0"
and "color1".
New Procedures and Functions
None.
New Tokens
Accepted by the <internalformat> parameter of TexImage2D, CopyTexImage2D,
and CompressedTexImage2DARB and the <format> parameter of
CompressedTexSubImage2DARB:
COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0
COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1
COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2
COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3
Additions to Chapter 2 of the OpenGL 1.2.1 Specification (OpenGL Operation)
None.
Additions to Chapter 3 of the OpenGL 1.2.1 Specification (Rasterization)
Add to Table 3.16.1: Specific Compressed Internal Formats
Compressed Internal Format Base Internal Format
========================== ====================
COMPRESSED_RGB_S3TC_DXT1_EXT RGB
COMPRESSED_RGBA_S3TC_DXT1_EXT RGBA
COMPRESSED_RGBA_S3TC_DXT3_EXT RGBA
COMPRESSED_RGBA_S3TC_DXT5_EXT RGBA
Modify Section 3.8.2, Alternate Image Specification
(add to end of TexSubImage discussion, p.123 -- after edit from the
ARB_texture_compression spec)
If the internal format of the texture image being modified is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
texture is stored using one of the several S3TC compressed texture image
formats. Such images are easily edited along 4x4 texel boundaries, so the
limitations on TexSubImage2D or CopyTexSubImage2D parameters are relaxed.
TexSubImage2D and CopyTexSubImage2D will result in an INVALID_OPERATION
error only if one of the following conditions occurs:
* <width> is not a multiple of four or equal to TEXTURE_WIDTH,
unless <xoffset> and <yoffset> are both zero.
* <height> is not a multiple of four or equal to TEXTURE_HEIGHT,
unless <xoffset> and <yoffset> are both zero.
* <xoffset> or <yoffset> is not a multiple of four.
The contents of any 4x4 block of texels of an S3TC compressed texture
image that does not intersect the area being modified are preserved during
valid TexSubImage2D and CopyTexSubImage2D calls.
Add to Section 3.8.2, Alternate Image Specification (adding to the end of
the CompressedTexImage section introduced by the ARB_texture_compression
spec)
If <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
COMPRESSED_RGBA_S3TC_DXT5_EXT, the compressed texture is stored using one
of several S3TC compressed texture image formats. The S3TC texture
compression algorithm supports only 2D images without borders.
CompressedTexImage1DARB and CompressedTexImage3DARB produce an
INVALID_ENUM error if <internalformat> is an S3TC format.
CompressedTexImage2DARB will produce an INVALID_OPERATION error if
<border> is non-zero.
Add to Section 3.8.2, Alternate Image Specification (adding to the end of
the CompressedTexSubImage section introduced by the
ARB_texture_compression spec)
If the internal format of the texture image being modified is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
texture is stored using one of the several S3TC compressed texture image
formats. Since the S3TC texture compression algorithm supports only 2D
images, CompressedTexSubImage1DARB and CompressedTexSubImage3DARB produce
an INVALID_ENUM error if <format> is an S3TC format. Since S3TC images
are easily edited along 4x4 texel boundaries, the limitations on
CompressedTexSubImage2D are relaxed. CompressedTexSubImage2D will result
in an INVALID_OPERATION error only if one of the following conditions
occurs:
* <width> is not a multiple of four or equal to TEXTURE_WIDTH.
* <height> is not a multiple of four or equal to TEXTURE_HEIGHT.
* <xoffset> or <yoffset> is not a multiple of four.
The contents of any 4x4 block of texels of an S3TC compressed texture
image that does not intersect the area being modified are preserved during
valid TexSubImage2D and CopyTexSubImage2D calls.
Additions to Chapter 4 of the OpenGL 1.2.1 Specification (Per-Fragment
Operations and the Frame Buffer)
None.
Additions to Chapter 5 of the OpenGL 1.2.1 Specification (Special Functions)
None.
Additions to Chapter 6 of the OpenGL 1.2.1 Specification (State and
State Requests)
None.
Additions to Appendix A of the OpenGL 1.2.1 Specification (Invariance)
None.
Additions to the AGL/GLX/WGL Specifications
None.
GLX Protocol
None.
Errors
INVALID_ENUM is generated by CompressedTexImage1DARB or
CompressedTexImage3DARB if <internalformat> is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT.
INVALID_OPERATION is generated by CompressedTexImage2DARB if
<internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
COMPRESSED_RGBA_S3TC_DXT5_EXT and <border> is not equal to zero.
INVALID_ENUM is generated by CompressedTexSubImage1DARB or
CompressedTexSubImage3DARB if <format> is COMPRESSED_RGB_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
COMPRESSED_RGBA_S3TC_DXT5_EXT.
INVALID_OPERATION is generated by TexSubImage2D CopyTexSubImage2D, or
CompressedTexSubImage2D if TEXTURE_INTERNAL_FORMAT is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT and any of
the following apply: <width> is not a multiple of four or equal to
TEXTURE_WIDTH; <height> is not a multiple of four or equal to
TEXTURE_HEIGHT; <xoffset> or <yoffset> is not a multiple of four.
The following restrictions from the ARB_texture_compression specification
do not apply to S3TC texture formats, since subimage modification is
straightforward as long as the subimage is properly aligned.
DELETE: INVALID_OPERATION is generated by TexSubImage1D, TexSubImage2D,
DELETE: TexSubImage3D, CopyTexSubImage1D, CopyTexSubImage2D, or
DELETE: CopyTexSubImage3D if the internal format of the texture image is
DELETE: compressed and <xoffset>, <yoffset>, or <zoffset> does not equal
DELETE: -b, where b is value of TEXTURE_BORDER.
DELETE: INVALID_VALUE is generated by CompressedTexSubImage1DARB,
DELETE: CompressedTexSubImage2DARB, or CompressedTexSubImage3DARB if the
DELETE: entire texture image is not being edited: if <xoffset>,
DELETE: <yoffset>, or <zoffset> is greater than -b, <xoffset> + <width> is
DELETE: less than w+b, <yoffset> + <height> is less than h+b, or <zoffset>
DELETE: + <depth> is less than d+b, where b is the value of
DELETE: TEXTURE_BORDER, w is the value of TEXTURE_WIDTH, h is the value of
DELETE: TEXTURE_HEIGHT, and d is the value of TEXTURE_DEPTH.
See also errors in the GL_ARB_texture_compression specification.
New State
In the "Textures" state table, increment the TEXTURE_INTERNAL_FORMAT
subscript for Z by 4 in the "Type" row.
New Implementation Dependent State
None
Appendix
S3TC Compressed Texture Image Formats
Compressed texture images stored using the S3TC compressed image formats
are represented as a collection of 4x4 texel blocks, where each block
contains 64 or 128 bits of texel data. The image is encoded as a normal
2D raster image in which each 4x4 block is treated as a single pixel. If
an S3TC image has a width or height less than four, the data corresponding
to texels outside the image are irrelevant and undefined.
When an S3TC image with a width of <w>, height of <h>, and block size of
<blocksize> (8 or 16 bytes) is decoded, the corresponding image size (in
bytes) is:
ceil(<w>/4) * ceil(<h>/4) * blocksize.
When decoding an S3TC image, the block containing the texel at offset
(<x>, <y>) begins at an offset (in bytes) relative to the base of the
image of:
blocksize * (ceil(<w>/4) * floor(<y>/4) + floor(<x>/4)).
The data corresponding to a specific texel (<x>, <y>) are extracted from a
4x4 texel block using a relative (x,y) value of
(<x> modulo 4, <y> modulo 4).
There are four distinct S3TC image formats:
COMPRESSED_RGB_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64
bits of RGB image data.
Each RGB image data block is encoded as a sequence of 8 bytes, called (in
order of increasing address):
c0_lo, c0_hi, c1_lo, c1_hi, bits_0, bits_1, bits_2, bits_3
The 8 bytes of the block are decoded into three quantities:
color0 = c0_lo + c0_hi * 256
color1 = c1_lo + c1_hi * 256
bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * bits_3))
color0 and color1 are 16-bit unsigned integers that are unpacked to
RGB colors RGB0 and RGB1 as though they were 16-bit packed pixels with
a <format> of RGB and a type of UNSIGNED_SHORT_5_6_5.
bits is a 32-bit unsigned integer, from which a two-bit control code
is extracted for a texel at location (x,y) in the block using:
code(x,y) = bits[2*(4*y+x)+1..2*(4*y+x)+0]
where bit 31 is the most significant and bit 0 is the least
significant bit.
The RGB color for a texel at location (x,y) in the block is given by:
RGB0, if color0 > color1 and code(x,y) == 0
RGB1, if color0 > color1 and code(x,y) == 1
(2*RGB0+RGB1)/3, if color0 > color1 and code(x,y) == 2
(RGB0+2*RGB1)/3, if color0 > color1 and code(x,y) == 3
RGB0, if color0 <= color1 and code(x,y) == 0
RGB1, if color0 <= color1 and code(x,y) == 1
(RGB0+RGB1)/2, if color0 <= color1 and code(x,y) == 2
BLACK, if color0 <= color1 and code(x,y) == 3
Arithmetic operations are done per component, and BLACK refers to an
RGB color where red, green, and blue are all zero.
Since this image has an RGB format, there is no alpha component and the
image is considered fully opaque.
COMPRESSED_RGBA_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64
bits of RGB image data and minimal alpha information. The RGB components
of a texel are extracted in the same way as COMPRESSED_RGB_S3TC_DXT1_EXT.
The alpha component for a texel at location (x,y) in the block is
given by:
0.0, if color0 <= color1 and code(x,y) == 3
1.0, otherwise
IMPORTANT: When encoding an RGBA image into a format using 1-bit
alpha, any texels with an alpha component less than 0.5 end up with an
alpha of 0.0 and any texels with an alpha component greater than or
equal to 0.5 end up with an alpha of 1.0. When encoding an RGBA image
into the COMPRESSED_RGBA_S3TC_DXT1_EXT format, the resulting red,
green, and blue components of any texels with a final alpha of 0.0
will automatically be zero (black). If this behavior is not desired
by an application, it should not use COMPRESSED_RGBA_S3TC_DXT1_EXT.
This format will never be used when a generic compressed internal
format (Table 3.16.2) is specified, although the nearly identical
format COMPRESSED_RGB_S3TC_DXT1_EXT (above) may be.
COMPRESSED_RGBA_S3TC_DXT3_EXT: Each 4x4 block of texels consists of 64
bits of uncompressed alpha image data followed by 64 bits of RGB image
data.
Each RGB image data block is encoded according to the
COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
bits always use the non-transparent encodings. In other words, they are
treated as though color0 > color1, regardless of the actual values of
color0 and color1.
Each alpha image data block is encoded as a sequence of 8 bytes, called
(in order of increasing address):
a0, a1, a2, a3, a4, a5, a6, a7
The 8 bytes of the block are decoded into one 64-bit integer:
alpha = a0 + 256 * (a1 + 256 * (a2 + 256 * (a3 + 256 * (a4 +
256 * (a5 + 256 * (a6 + 256 * a7))))))
alpha is a 64-bit unsigned integer, from which a four-bit alpha value
is extracted for a texel at location (x,y) in the block using:
alpha(x,y) = bits[4*(4*y+x)+3..4*(4*y+x)+0]
where bit 63 is the most significant and bit 0 is the least
significant bit.
The alpha component for a texel at location (x,y) in the block is
given by alpha(x,y) / 15.
COMPRESSED_RGBA_S3TC_DXT5_EXT: Each 4x4 block of texels consists of 64
bits of compressed alpha image data followed by 64 bits of RGB image data.
Each RGB image data block is encoded according to the
COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
bits always use the non-transparent encodings. In other words, they are
treated as though color0 > color1, regardless of the actual values of
color0 and color1.
Each alpha image data block is encoded as a sequence of 8 bytes, called
(in order of increasing address):
alpha0, alpha1, bits_0, bits_1, bits_2, bits_3, bits_4, bits_5
The alpha0 and alpha1 are 8-bit unsigned bytes converted to alpha
components by multiplying by 1/255.
The 6 "bits" bytes of the block are decoded into one 48-bit integer:
bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 +
256 * (bits_4 + 256 * bits_5))))
bits is a 48-bit unsigned integer, from which a three-bit control code
is extracted for a texel at location (x,y) in the block using:
code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0]
where bit 47 is the most significant and bit 0 is the least
significant bit.
The alpha component for a texel at location (x,y) in the block is
given by:
alpha0, code(x,y) == 0
alpha1, code(x,y) == 1
(6*alpha0 + 1*alpha1)/7, alpha0 > alpha1 and code(x,y) == 2
(5*alpha0 + 2*alpha1)/7, alpha0 > alpha1 and code(x,y) == 3
(4*alpha0 + 3*alpha1)/7, alpha0 > alpha1 and code(x,y) == 4
(3*alpha0 + 4*alpha1)/7, alpha0 > alpha1 and code(x,y) == 5
(2*alpha0 + 5*alpha1)/7, alpha0 > alpha1 and code(x,y) == 6
(1*alpha0 + 6*alpha1)/7, alpha0 > alpha1 and code(x,y) == 7
(4*alpha0 + 1*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 2
(3*alpha0 + 2*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 3
(2*alpha0 + 3*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 4
(1*alpha0 + 4*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 5
0.0, alpha0 <= alpha1 and code(x,y) == 6
1.0, alpha0 <= alpha1 and code(x,y) == 7
Revision History
1.1, 11/16/01 pbrown: Updated contact info, clarified where texels
fall within a single block.
1.0, 07/07/00 prbrown1: Published final version agreed to by working
group members.
0.9, 06/24/00 prbrown1: Documented that block-aligned TexSubImage calls
do not modify existing texels outside the
modified blocks. Added caveat to allow for a
(0,0)-anchored TexSubImage operation of
arbitrary size.
0.7, 04/11/00 prbrown1: Added issues on DXT1, DXT3, and DXT5 encodings
where the MSDN documentation doesn't match what
is really done. Added enum values from the
extension registry.
0.4, 03/28/00 prbrown1: Updated to reflect final version of the
ARB_texture_compression extension. Allowed
block-aligned TexSubImage calls.
0.3, 03/07/00 prbrown1: Resolved issues pertaining to the format of RGB
blocks in the DXT3 and DXT5 formats (they don't
ever use the "transparent" encoding). Fixed
decoding of DXT1 blocks. Pointed out issue of
"transparent" texels in DXT1 encodings having
different behaviors for RGB and RGBA internal
formats.
0.2, 02/23/00 prbrown1: Minor revisions; added several issues.
0.11, 02/17/00 prbrown1: Slight modification to error semantics
(INVALID_ENUM instead of INVALID_OPERATION).
0.1, 02/15/00 prbrown1: Initial revision.

View File

@ -0,0 +1,39 @@
Microsoft Visual Studio Solution File, Format Version 8.00
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squish", "squish\squish.vcproj", "{6A8518C3-D81A-4428-BD7F-C37933088AC1}"
ProjectSection(ProjectDependencies) = postProject
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishpng", "squishpng\squishpng.vcproj", "{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}"
ProjectSection(ProjectDependencies) = postProject
{6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishtest", "squishtest\squishtest.vcproj", "{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}"
ProjectSection(ProjectDependencies) = postProject
{6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfiguration) = preSolution
Debug = Debug
Release = Release
EndGlobalSection
GlobalSection(ProjectConfiguration) = postSolution
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.ActiveCfg = Debug|Win32
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.Build.0 = Debug|Win32
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.ActiveCfg = Release|Win32
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.Build.0 = Release|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.ActiveCfg = Debug|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.Build.0 = Debug|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.ActiveCfg = Release|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.Build.0 = Release|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.ActiveCfg = Debug|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.Build.0 = Debug|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.ActiveCfg = Release|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
EndGlobalSection
GlobalSection(ExtensibilityAddIns) = postSolution
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,198 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.10"
Name="squish"
ProjectGUID="{6A8518C3-D81A-4428-BD7F-C37933088AC1}"
Keyword="Win32Proj">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="4"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;SQUISH_USE_SSE=1"
MinimalRebuild="TRUE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
EnableEnhancedInstructionSet="1"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLibrarianTool"
OutputFile="$(OutDir)/squish.lib"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="4"
CharacterSet="2"
WholeProgramOptimization="TRUE">
<Tool
Name="VCCLCompilerTool"
GlobalOptimizations="TRUE"
InlineFunctionExpansion="2"
FavorSizeOrSpeed="1"
OmitFramePointers="TRUE"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;SQUISH_USE_SSE=1"
RuntimeLibrary="2"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLibrarianTool"
OutputFile="$(OutDir)/squish.lib"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
RelativePath="..\..\alpha.cpp">
</File>
<File
RelativePath="..\..\clusterfit.cpp">
</File>
<File
RelativePath="..\..\colourblock.cpp">
</File>
<File
RelativePath="..\..\colourfit.cpp">
</File>
<File
RelativePath="..\..\colourset.cpp">
</File>
<File
RelativePath="..\..\maths.cpp">
</File>
<File
RelativePath="..\..\rangefit.cpp">
</File>
<File
RelativePath="..\..\singlecolourfit.cpp">
</File>
<File
RelativePath="..\..\squish.cpp">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
<File
RelativePath="..\..\alpha.h">
</File>
<File
RelativePath="..\..\clusterfit.h">
</File>
<File
RelativePath="..\..\colourblock.h">
</File>
<File
RelativePath="..\..\colourfit.h">
</File>
<File
RelativePath="..\..\colourset.h">
</File>
<File
RelativePath="..\..\config.h">
</File>
<File
RelativePath="..\..\maths.h">
</File>
<File
RelativePath="..\..\rangefit.h">
</File>
<File
RelativePath="..\..\simd.h">
</File>
<File
RelativePath="..\..\simd_sse.h">
</File>
<File
RelativePath="..\..\simd_ve.h">
</File>
<File
RelativePath="..\..\singlecolourfit.h">
</File>
<File
RelativePath="..\..\singlecolourlookup.inl">
</File>
<File
RelativePath="..\..\squish.h">
</File>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
<File
RelativePath="..\..\texture_compression_s3tc.txt">
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -0,0 +1,140 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.10"
Name="squishpng"
ProjectGUID="{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}"
Keyword="Win32Proj">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="TRUE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="libpng13d.lib"
OutputFile="$(OutDir)/squishpng.exe"
LinkIncremental="2"
GenerateDebugInformation="TRUE"
ProgramDatabaseFile="$(OutDir)/squishpng.pdb"
SubSystem="1"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="libpng13.lib"
OutputFile="$(OutDir)/squishpng.exe"
LinkIncremental="1"
GenerateDebugInformation="TRUE"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
RelativePath="..\..\extra\squishpng.cpp">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -0,0 +1,138 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.10"
Name="squishtest"
ProjectGUID="{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}"
Keyword="Win32Proj">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="TRUE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
OutputFile="$(OutDir)/squishtest.exe"
LinkIncremental="2"
GenerateDebugInformation="TRUE"
ProgramDatabaseFile="$(OutDir)/squishtest.pdb"
SubSystem="1"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
OutputFile="$(OutDir)/squishtest.exe"
LinkIncremental="1"
GenerateDebugInformation="TRUE"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
RelativePath="..\..\extra\squishtest.cpp">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -0,0 +1,576 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "weightedclusterfit.h"
#include "colourset.h"
#include "colourblock.h"
#include <cfloat>
namespace squish {
WeightedClusterFit::WeightedClusterFit( ColourSet const* colours, int flags ) :
ColourFit( colours, flags )
{
// initialise the best error
#if SQUISH_USE_SIMD
m_besterror = VEC4_CONST( FLT_MAX );
#else
m_besterror = FLT_MAX;
#endif
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// get the covariance matrix
Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
// compute the principle component
Vec3 principle = ComputePrincipleComponent( covariance );
// build the list of values
float dps[16];
for( int i = 0; i < count; ++i )
{
dps[i] = Dot( values[i], principle );
m_order[i] = i;
}
// stable sort
for( int i = 0; i < count; ++i )
{
for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
{
std::swap( dps[j], dps[j - 1] );
std::swap( m_order[j], m_order[j - 1] );
}
}
// weight all the points
#if SQUISH_USE_SIMD
Vec4 const* unweighted = m_colours->GetPointsSimd();
Vec4 const* weights = m_colours->GetWeightsSimd();
m_xxsum = VEC4_CONST( 0.0f );
m_xsum = VEC4_CONST( 0.0f );
#else
Vec3 const* unweighted = m_colours->GetPoints();
float const* weights = m_colours->GetWeights();
m_xxsum = Vec3( 0.0f );
m_xsum = Vec3( 0.0f );
m_wsum = 0.0f;
#endif
for( int i = 0; i < count; ++i )
{
int p = m_order[i];
m_weighted[i] = weights[p] * unweighted[p];
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
#if !SQUISH_USE_SIMD
m_weights[i] = weights[p];
m_wsum += m_weights[i];
#endif
}
}
void WeightedClusterFit::setMetric(float r, float g, float b)
{
#if SQUISH_USE_SIMD
m_metric = Vec4(r, g, b, 0);
#else
m_metric = Vec3(r, g, b);
#endif
}
float WeightedClusterFit::bestError() const
{
#if SQUISH_USE_SIMD
Vec4 x = m_xxsum * m_metric;
Vec4 error = m_besterror + x.SplatX() + x.SplatY() + x.SplatZ();
return error.GetVec3().X();
#else
return m_besterror + Dot(m_xxsum, m_metric);
#endif
}
#if SQUISH_USE_SIMD
void WeightedClusterFit::Compress3( void* block )
{
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f);
Vec4 const two = VEC4_CONST(2.0);
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = VEC4_CONST( FLT_MAX );
Vec4 x0 = zero;
int b0 = 0, b1 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
{
Vec4 x1 = zero;
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec4 const x2 = m_xsum - x1 - x0;
//Vec3 const alphax_sum = x0 + x1 * 0.5f;
//float const alpha2_sum = w0 + w1 * 0.25f;
Vec4 const alphax_sum = MultiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum
Vec4 const alpha2_sum = alphax_sum.SplatW();
//Vec3 const betax_sum = x2 + x1 * 0.5f;
//float const beta2_sum = w2 + w1 * 0.25f;
Vec4 const betax_sum = MultiplyAdd(x1, half, x2); // betax_sum, beta2_sum
Vec4 const beta2_sum = betax_sum.SplatW();
//float const alphabeta_sum = w1 * 0.25f;
Vec4 const alphabeta_sum = (x1 * half).SplatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e4 = e3 * m_metric;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
}
x1 += m_weighted[c0+c1];
}
x0 += m_weighted[c0];
}
// save the block if necessary
if( CompareAnyLessThan( besterror, m_besterror ) )
{
// compute indices from cluster sizes.
u8 bestindices[16];
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
// save the block
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
// save the error
m_besterror = besterror;
}
}
void WeightedClusterFit::Compress4( void* block )
{
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half = VEC4_CONST(0.5f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = VEC4_CONST( FLT_MAX );
Vec4 x0 = zero;
int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
{
Vec4 x1 = zero;
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec4 x2 = zero;
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
Vec4 const x3 = m_xsum - x2 - x1 - x0;
//Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); // alphax_sum, alpha2_sum
Vec4 const alpha2_sum = alphax_sum.SplatW();
//Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
Vec4 const betax_sum = x3 + MultiplyAdd(x2, twothirds, x1 * onethird); // betax_sum, beta2_sum
Vec4 const beta2_sum = betax_sum.SplatW();
//float const alphabeta_sum = w1 * (2.0f/9.0f) + w2 * (2.0f/9.0f);
Vec4 const alphabeta_sum = two * (x1 * onethird + x2 * onethird).SplatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e4 = e3 * m_metric;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
b2 = c2;
}
x2 += m_weighted[c0+c1+c2];
}
x1 += m_weighted[c0+c1];
}
x0 += m_weighted[c0];
}
// save the block if necessary
if( CompareAnyLessThan( besterror, m_besterror ) )
{
// compute indices from cluster sizes.
u8 bestindices[16];
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
// save the block
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
// save the error
m_besterror = besterror;
}
}
#else
void WeightedClusterFit::Compress3( void* block )
{
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
float besterror = FLT_MAX;
Vec3 x0(0.0f);
float w0 = 0.0f;
int b0 = 0, b1 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= 16-c0; c1++)
{
float w2 = m_wsum - w0 - w1;
// These factors could be entirely precomputed.
float const alpha2_sum = w0 + w1 * 0.25f;
float const beta2_sum = w2 + w1 * 0.25f;
float const alphabeta_sum = w1 * 0.25f;
float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vec3 const alphax_sum = x0 + x1 * 0.5f;
Vec3 const betax_sum = m_xsum - alphax_sum;
Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
// compute the error
Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
// apply the metric to the error term
float error = Dot( e1, m_metric );
// keep the solution if it wins
if( error < besterror )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
}
x1 += m_weighted[c0+c1];
w1 += m_weights[c0+c1];
}
x0 += m_weighted[c0];
w0 += m_weights[c0];
}
// save the block if necessary
if( besterror < m_besterror )
{
// compute indices from cluster sizes.
u8 bestindices[16];
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
// save the block
WriteColourBlock3( beststart, bestend, ordered, block );
// save the error
m_besterror = besterror;
}
}
void WeightedClusterFit::Compress4( void* block )
{
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
float besterror = FLT_MAX;
Vec3 x0(0.0f);
float w0 = 0.0f;
int b0 = 0, b1 = 0, b2 = 0;
int i = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec3 x2(0.0f);
float w2 = 0.0f;
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
float w3 = m_wsum - w0 - w1 - w2;
float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
Vec3 const betax_sum = m_xsum - alphax_sum;
Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
// compute the error
Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
// apply the metric to the error term
float error = Dot( e1, m_metric );
// keep the solution if it wins
if( error < besterror )
{
besterror = error;
beststart = a;
bestend = b;
b0 = c0;
b1 = c1;
b2 = c2;
}
x2 += m_weighted[c0+c1+c2];
w2 += m_weights[c0+c1+c2];
}
x1 += m_weighted[c0+c1];
w1 += m_weights[c0+c1];
}
x0 += m_weighted[c0];
w0 += m_weights[c0];
}
// save the block if necessary
if( besterror < m_besterror )
{
// compute indices from cluster sizes.
u8 bestindices[16];
{
int i = 0;
for(; i < b0; i++) {
bestindices[i] = 0;
}
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
// save the block
WriteColourBlock4( beststart, bestend, ordered, block );
// save the error
m_besterror = besterror;
}
}
#endif
} // namespace squish

View File

@ -0,0 +1,76 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_WEIGHTEDCLUSTERFIT_H
#define SQUISH_WEIGHTEDCLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace squish {
class WeightedClusterFit : public ColourFit
{
public:
WeightedClusterFit( ColourSet const* colours, int flags );
void setMetric(float r, float g, float b);
float bestError() const;
static void doPrecomputation();
// Make them public
virtual void Compress3( void* block );
virtual void Compress4( void* block );
private:
Vec3 m_principle;
#if SQUISH_USE_SIMD
Vec4 m_weighted[16];
Vec4 m_metric;
Vec4 m_xxsum;
Vec4 m_xsum;
Vec4 m_besterror;
#else
Vec3 m_weighted[16];
float m_weights[16];
Vec3 m_metric;
Vec3 m_xxsum;
Vec3 m_xsum;
float m_wsum;
float m_besterror;
#endif
int m_order[16];
};
} // namespace squish
#endif // ndef SQUISH_WEIGHTEDCLUSTERFIT_H

65
src/nvtt/tools/cmdline.h Normal file
View File

@ -0,0 +1,65 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef CMDLINE_H
#define CMDLINE_H
#include <nvcore/Debug.h>
#include <stdarg.h>
struct MyMessageHandler : public nv::MessageHandler {
MyMessageHandler() {
nv::debug::setMessageHandler( this );
}
~MyMessageHandler() {
nv::debug::resetMessageHandler();
}
virtual void log( const char * str, va_list arg ) {
va_list val;
va_copy(val, arg);
vfprintf(stderr, str, arg);
va_end(val);
}
};
struct MyAssertHandler : public nv::AssertHandler {
MyAssertHandler() {
nv::debug::setAssertHandler( this );
}
~MyAssertHandler() {
nv::debug::resetAssertHandler();
}
// Handler method, note that func might be NULL!
virtual int assert( const char *exp, const char *file, int line, const char *func ) {
fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line);
nv::debug::dumpInfo();
exit(1);
}
};
#endif // CMDLINE_H

438
src/nvtt/tools/compress.cpp Normal file
View File

@ -0,0 +1,438 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvimage/Image.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvtt/nvtt.h>
#include "cmdline.h"
#include <time.h> // clock
//#define WINDOWS_LEAN_AND_MEAN
//#include <windows.h> // TIMER
struct MyOutputHandler : public nvtt::OutputHandler
{
MyOutputHandler(const char * name) : total(0), progress(0), percentage(0), stream(new nv::StdOutputStream(name)) {}
virtual ~MyOutputHandler() { delete stream; }
virtual void setTotal(int t)
{
total = t;
}
virtual void setDisplayProgress(bool b)
{
verbose = b;
}
virtual void mipmap(int size, int width, int height, int depth, int face, int miplevel)
{
// ignore.
}
// Output data.
virtual void writeData(const void * data, int size)
{
nvDebugCheck(stream != NULL);
stream->serialize(const_cast<void *>(data), size);
progress += size;
int p = (100 * progress) / total;
if (verbose && p != percentage)
{
percentage = p;
printf("\r%d%%", percentage);
fflush(stdout);
}
}
int total;
int progress;
int percentage;
bool verbose;
nv::StdOutputStream * stream;
};
struct MyErrorHandler : public nvtt::ErrorHandler
{
virtual void error(nvtt::Error e)
{
nvDebugBreak();
}
};
// Set color to normal map conversion options.
void setColorToNormalMap(nvtt::InputOptions & inputOptions)
{
inputOptions.setNormalMap(false);
inputOptions.setConvertToNormalMap(true);
inputOptions.setHeightEvaluation(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 0.0f);
//inputOptions.setNormalFilter(1.0f, 0, 0, 0);
//inputOptions.setNormalFilter(0.0f, 0, 0, 1);
inputOptions.setGamma(1.0f, 1.0f);
inputOptions.setNormalizeMipmaps(true);
}
// Set options for normal maps.
void setNormalMap(nvtt::InputOptions & inputOptions)
{
inputOptions.setNormalMap(true);
inputOptions.setConvertToNormalMap(false);
inputOptions.setGamma(1.0f, 1.0f);
inputOptions.setNormalizeMipmaps(true);
}
// Set options for color maps.
void setColorMap(nvtt::InputOptions & inputOptions)
{
inputOptions.setNormalMap(false);
inputOptions.setConvertToNormalMap(false);
inputOptions.setGamma(2.2f, 2.2f);
inputOptions.setNormalizeMipmaps(false);
}
int main(int argc, char *argv[])
{
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
bool normal = false;
bool color2normal = false;
bool wrapRepeat = false;
bool noMipmaps = false;
bool fast = false;
bool nocuda = false;
bool silent = false;
nvtt::Format format = nvtt::Format_BC1;
const char * externalCompressor = NULL;
nv::Path input;
nv::Path output;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
// Input options.
if (strcmp("-color", argv[i]) == 0)
{
}
else if (strcmp("-normal", argv[i]) == 0)
{
normal = true;
}
else if (strcmp("-tonormal", argv[i]) == 0)
{
color2normal = true;
}
else if (strcmp("-clamp", argv[i]) == 0)
{
}
else if (strcmp("-repeat", argv[i]) == 0)
{
wrapRepeat = true;
}
else if (strcmp("-nomips", argv[i]) == 0)
{
noMipmaps = true;
}
// Compression options.
else if (strcmp("-fast", argv[i]) == 0)
{
fast = true;
}
else if (strcmp("-nocuda", argv[i]) == 0)
{
nocuda = true;
}
else if (strcmp("-rgb", argv[i]) == 0)
{
format = nvtt::Format_RGB;
}
else if (strcmp("-bc1", argv[i]) == 0)
{
format = nvtt::Format_BC1;
}
else if (strcmp("-bc1a", argv[i]) == 0)
{
format = nvtt::Format_BC1a;
}
else if (strcmp("-bc2", argv[i]) == 0)
{
format = nvtt::Format_BC2;
}
else if (strcmp("-bc3", argv[i]) == 0)
{
format = nvtt::Format_BC3;
}
else if (strcmp("-bc3n", argv[i]) == 0)
{
format = nvtt::Format_BC3n;
}
else if (strcmp("-bc4", argv[i]) == 0)
{
format = nvtt::Format_BC4;
}
else if (strcmp("-bc5", argv[i]) == 0)
{
format = nvtt::Format_BC5;
}
// Undocumented option. Mainly used for testing.
else if (strcmp("-ext", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
externalCompressor = argv[i+1];
i++;
}
}
// Misc options
else if (strcmp("-silent", argv[i]) == 0)
{
silent = true;
}
else if (argv[i][0] != '-')
{
input = argv[i];
if (i+1 < argc && argv[i+1][0] != '-') {
output = argv[i+1];
}
else
{
output.copy(input.str());
output.stripExtension();
output.append(".dds");
}
break;
}
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
if (input.isNull())
{
printf("usage: nvcompress [options] infile [outfile]\n\n");
printf("Input options:\n");
printf(" -color \tThe input image is a color map (default).\n");
printf(" -normal \tThe input image is a normal map.\n");
printf(" -tonormal\tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n");
printf(" -repeat \tRepeat wrapping mode.\n");
printf(" -nomips \tDisable mipmap generation.\n\n");
printf("Compression options:\n");
printf(" -fast \tFast compression.\n");
printf(" -nocuda \tDo not use cuda compressor.\n");
printf(" -rgb \tRGBA format\n");
printf(" -bc1 \tBC1 format (DXT1)\n");
printf(" -bc1a \tBC1 format with binary alpha (DXT1a)\n");
printf(" -bc2 \tBC2 format (DXT3)\n");
printf(" -bc3 \tBC3 format (DXT5)\n");
printf(" -bc3n \tBC3 normal map format (DXT5nm)\n");
printf(" -bc4 \tBC4 format (ATI1)\n");
printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n");
return 1;
}
// @@ Make sure input file exists.
// Set input options.
nvtt::InputOptions inputOptions;
if (nv::strCaseCmp(input.extension(), ".dds") == 0)
{
// Load surface.
nv::DirectDrawSurface dds(input);
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
return 1;
}
if (!dds.isSupported() || dds.isTexture3D())
{
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
return 1;
}
uint faceCount;
if (dds.isTexture2D())
{
inputOptions.setTextureLayout(nvtt::TextureType_2D, dds.width(), dds.height());
faceCount = 1;
}
else
{
nvDebugCheck(dds.isTextureCube());
inputOptions.setTextureLayout(nvtt::TextureType_Cube, dds.width(), dds.height());
faceCount = 6;
}
uint mipmapCount = dds.mipmapCount();
nv::Image mipmap;
for (uint f = 0; f < faceCount; f++)
{
for (uint m = 0; m <= mipmapCount; m++)
{
dds.mipmap(&mipmap, f, m);
inputOptions.setMipmapData(mipmap.pixels(), mipmap.width(), mipmap.height(), 1, f, m);
}
}
}
else
{
// Regular image.
nv::Image image;
if (!image.load(input))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return 1;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
inputOptions.setMipmapData(image.pixels(), image.width(), image.height());
}
if (fast)
{
inputOptions.setMipmapping(true, nvtt::MipmapFilter_Box);
}
else
{
inputOptions.setMipmapping(true, nvtt::MipmapFilter_Box);
//inputOptions.setMipmapping(true, nvtt::MipmapFilter_Kaiser);
}
if (wrapRepeat)
{
inputOptions.setWrapMode(nvtt::WrapMode_Repeat);
}
else
{
inputOptions.setWrapMode(nvtt::WrapMode_Clamp);
}
if (normal)
{
setNormalMap(inputOptions);
}
else if (color2normal)
{
setColorToNormalMap(inputOptions);
}
else
{
setColorMap(inputOptions);
}
if (noMipmaps)
{
inputOptions.setMipmapping(false);
}
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(format);
if (fast)
{
compressionOptions.setQuality(nvtt::Quality_Fastest);
}
else
{
compressionOptions.setQuality(nvtt::Quality_Normal);
//compressionOptions.setQuality(nvtt::Quality_Production, 0.5f);
//compressionOptions.setQuality(nvtt::Quality_Highest);
}
compressionOptions.enableHardwareCompression(!nocuda);
compressionOptions.setColorWeights(1, 1, 1);
if (externalCompressor != NULL)
{
compressionOptions.setExternalCompressor(externalCompressor);
}
MyErrorHandler errorHandler;
MyOutputHandler outputHandler(output);
if (outputHandler.stream->isError())
{
fprintf(stderr, "Error opening '%s' for writting\n", output.str());
return 1;
}
outputHandler.setTotal(nvtt::estimateSize(inputOptions, compressionOptions));
outputHandler.setDisplayProgress(!silent);
nvtt::OutputOptions outputOptions(&outputHandler, &errorHandler);
//nvtt::OutputOptions outputOptions(NULL, &errorHandler);
// printf("Press ENTER.\n");
// fflush(stdout);
// getchar();
/* LARGE_INTEGER temp;
QueryPerformanceFrequency((LARGE_INTEGER*) &temp);
double freq = ((double) temp.QuadPart) / 1000.0;
LARGE_INTEGER start_time;
QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
*/
clock_t start = clock();
nvtt::compress(inputOptions, outputOptions, compressionOptions);
/*
LARGE_INTEGER end_time;
QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
float diff_time = (float) (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
printf("\rtime taken: %.3f seconds\n", diff_time/1000);
*/
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return 0;
}

View File

@ -0,0 +1,31 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "configdialog.h"
ConfigDialog::ConfigDialog(QWidget *parent/*=0*/) : QDialog(parent)
{
ui.setupUi(this);
}

View File

@ -0,0 +1,43 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef CONFIGDIALOG_H
#define CONFIGDIALOG_H
#include <QtGui/QDialog>
#include "ui_nvdxtdialog.h"
class ConfigDialog : public QDialog
{
Q_OBJECT
public:
ConfigDialog(QWidget *parent = 0);
private:
Ui::ConfigDialog ui;
};
#endif // CONFIGDIALOG_H

View File

@ -0,0 +1,983 @@
<ui version="4.0" >
<class>ConfigDialog</class>
<widget class="QDialog" name="ConfigDialog" >
<property name="geometry" >
<rect>
<x>0</x>
<y>0</y>
<width>626</width>
<height>532</height>
</rect>
</property>
<property name="windowTitle" >
<string>Dialog</string>
</property>
<property name="sizeGripEnabled" >
<bool>true</bool>
</property>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>9</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QListWidget" name="listWidget" >
<property name="sizePolicy" >
<sizepolicy>
<hsizetype>7</hsizetype>
<vsizetype>7</vsizetype>
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="minimumSize" >
<size>
<width>64</width>
<height>0</height>
</size>
</property>
<property name="maximumSize" >
<size>
<width>128</width>
<height>16777215</height>
</size>
</property>
<property name="horizontalScrollBarPolicy" >
<enum>Qt::ScrollBarAlwaysOff</enum>
</property>
<property name="movement" >
<enum>QListView::Static</enum>
</property>
<property name="flow" >
<enum>QListView::TopToBottom</enum>
</property>
<property name="isWrapping" stdset="0" >
<bool>false</bool>
</property>
<property name="resizeMode" >
<enum>QListView::Adjust</enum>
</property>
<property name="viewMode" >
<enum>QListView::IconMode</enum>
</property>
<item>
<property name="text" >
<string>Input</string>
</property>
<property name="icon" >
<iconset>../../../../../../castano-stuff/qshaderedit/src/images/win/fileopen.png</iconset>
</property>
</item>
<item>
<property name="text" >
<string>Output</string>
</property>
<property name="icon" >
<iconset>../../../../../../castano-stuff/qshaderedit/src/images/win/filesave.png</iconset>
</property>
</item>
<item>
<property name="text" >
<string>Settings</string>
</property>
<property name="icon" >
<iconset>../../../../../../castano-stuff/qshaderedit/src/images/toolbutton.png</iconset>
</property>
</item>
<item>
<property name="text" >
<string>Preview</string>
</property>
<property name="icon" >
<iconset>../../../../../../castano-stuff/qshaderedit/src/images/colorpicker.png</iconset>
</property>
</item>
<item>
<property name="text" >
<string>3D Preview</string>
</property>
<property name="icon" >
<iconset>../../../../../../castano-stuff/qshaderedit/src/images/colorpicker.png</iconset>
</property>
</item>
</widget>
</item>
<item>
<widget class="QTabWidget" name="tabWidget" >
<property name="sizePolicy" >
<sizepolicy>
<hsizetype>7</hsizetype>
<vsizetype>7</vsizetype>
<horstretch>5</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="minimumSize" >
<size>
<width>400</width>
<height>0</height>
</size>
</property>
<property name="tabPosition" >
<enum>QTabWidget::North</enum>
</property>
<property name="tabShape" >
<enum>QTabWidget::Rounded</enum>
</property>
<property name="currentIndex" >
<number>0</number>
</property>
<widget class="QWidget" name="tab_5" >
<attribute name="title" >
<string>Input</string>
</attribute>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>9</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QLineEdit" name="lineEdit" />
</item>
<item>
<widget class="QPushButton" name="openButton" >
<property name="text" >
<string>Open</string>
</property>
</widget>
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QLabel" name="label_4" >
<property name="sizePolicy" >
<sizepolicy>
<hsizetype>0</hsizetype>
<vsizetype>0</vsizetype>
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="minimumSize" >
<size>
<width>172</width>
<height>172</height>
</size>
</property>
<property name="baseSize" >
<size>
<width>172</width>
<height>172</height>
</size>
</property>
<property name="acceptDrops" >
<bool>true</bool>
</property>
<property name="toolTip" >
<string>Drop images here</string>
</property>
<property name="frameShape" >
<enum>QFrame::StyledPanel</enum>
</property>
<property name="text" >
<string/>
</property>
<property name="pixmap" >
<pixmap>../../../../../../castano-stuff/qshaderedit/src/images/default.png</pixmap>
</property>
<property name="scaledContents" >
<bool>true</bool>
</property>
<property name="margin" >
<number>1</number>
</property>
</widget>
</item>
<item>
<widget class="QTextEdit" name="textEdit" >
<property name="enabled" >
<bool>false</bool>
</property>
<property name="sizePolicy" >
<sizepolicy>
<hsizetype>0</hsizetype>
<vsizetype>7</vsizetype>
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="minimumSize" >
<size>
<width>172</width>
<height>0</height>
</size>
</property>
<property name="frameShape" >
<enum>QFrame::StyledPanel</enum>
</property>
</widget>
</item>
</layout>
</item>
<item>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>4</number>
</property>
<item>
<widget class="QGroupBox" name="groupBox_4" >
<property name="sizePolicy" >
<sizepolicy>
<hsizetype>5</hsizetype>
<vsizetype>5</vsizetype>
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="title" >
<string>Type</string>
</property>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>4</number>
</property>
<property name="spacing" >
<number>0</number>
</property>
<item>
<widget class="QRadioButton" name="radioButton" >
<property name="text" >
<string>RGB</string>
</property>
<property name="checked" >
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QRadioButton" name="radioButton_2" >
<property name="text" >
<string>RGBA</string>
</property>
</widget>
</item>
<item>
<widget class="QRadioButton" name="radioButton_3" >
<property name="text" >
<string>Monochrome</string>
</property>
</widget>
</item>
<item>
<widget class="QRadioButton" name="radioButton_4" >
<property name="text" >
<string>Normal Map</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QCheckBox" name="checkBox_5" >
<property name="enabled" >
<bool>false</bool>
</property>
<property name="text" >
<string>Alpha is opacity</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="checkBox_2" >
<property name="enabled" >
<bool>false</bool>
</property>
<property name="text" >
<string>Convert to normal map</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="checkBox" >
<property name="text" >
<string>Generate mipmaps</string>
</property>
<property name="checked" >
<bool>true</bool>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QLabel" name="label_8" >
<property name="text" >
<string>Mipmap filter</string>
</property>
<property name="buddy" >
<cstring>comboBox</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="comboBox" >
<item>
<property name="text" >
<string>Box</string>
</property>
</item>
<item>
<property name="text" >
<string>Triangle</string>
</property>
</item>
<item>
<property name="text" >
<string>Mitchell</string>
</property>
</item>
<item>
<property name="text" >
<string>Kaiser</string>
</property>
</item>
</widget>
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QLabel" name="label_5" >
<property name="sizePolicy" >
<sizepolicy>
<hsizetype>1</hsizetype>
<vsizetype>5</vsizetype>
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text" >
<string>Gamma</string>
</property>
<property name="buddy" >
<cstring>gammaSpinBox</cstring>
</property>
</widget>
</item>
<item>
<widget class="QDoubleSpinBox" name="gammaSpinBox" >
<property name="buttonSymbols" >
<enum>QAbstractSpinBox::UpDownArrows</enum>
</property>
<property name="maximum" >
<double>4.000000000000000</double>
</property>
<property name="minimum" >
<double>0.050000000000000</double>
</property>
<property name="singleStep" >
<double>0.050000000000000</double>
</property>
<property name="value" >
<double>2.200000000000000</double>
</property>
</widget>
</item>
</layout>
</item>
<item>
<spacer>
<property name="orientation" >
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" >
<size>
<width>204</width>
<height>71</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
</layout>
</item>
</layout>
</widget>
<widget class="QWidget" name="tab_3" >
<attribute name="title" >
<string>Output</string>
</attribute>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>9</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QLabel" name="label_3" >
<property name="text" >
<string>Format:</string>
</property>
<property name="textFormat" >
<enum>Qt::PlainText</enum>
</property>
<property name="textInteractionFlags" >
<enum>Qt::NoTextInteraction</enum>
</property>
<property name="buddy" >
<cstring>formatComboBox</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="formatComboBox" >
<item>
<property name="text" >
<string>BC1 (DXT1)</string>
</property>
</item>
<item>
<property name="text" >
<string>BC2 (DXT3)</string>
</property>
</item>
<item>
<property name="text" >
<string>BC3 (DXT5)</string>
</property>
</item>
</widget>
</item>
<item>
<widget class="QLabel" name="label_9" >
<property name="text" >
<string>Color Space:</string>
</property>
<property name="buddy" >
<cstring>comboBox_2</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="comboBox_2" >
<item>
<property name="text" >
<string>RGB</string>
</property>
</item>
<item>
<property name="text" >
<string>YCoCg</string>
</property>
</item>
<item>
<property name="text" >
<string>JPEG LS (R-G, G, B-G)</string>
</property>
</item>
</widget>
</item>
</layout>
</item>
<item>
<spacer>
<property name="orientation" >
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" >
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QPushButton" name="pushButton_5" >
<property name="text" >
<string>Export</string>
</property>
</widget>
</item>
</layout>
</item>
<item>
<spacer>
<property name="orientation" >
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" >
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
<widget class="QWidget" name="tab_4" >
<attribute name="title" >
<string>Settings</string>
</attribute>
<widget class="QWidget" name="verticalLayout_3" >
<property name="geometry" >
<rect>
<x>10</x>
<y>10</y>
<width>202</width>
<height>242</height>
</rect>
</property>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QGroupBox" name="groupBox_3" >
<property name="title" >
<string>Quality</string>
</property>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>9</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QSlider" name="horizontalSlider" >
<property name="maximum" >
<number>9</number>
</property>
<property name="value" >
<number>9</number>
</property>
<property name="orientation" >
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
<item>
<widget class="QLabel" name="label_2" >
<property name="frameShape" >
<enum>QFrame::StyledPanel</enum>
</property>
<property name="frameShadow" >
<enum>QFrame::Plain</enum>
</property>
<property name="text" >
<string>9 - Best</string>
</property>
<property name="textFormat" >
<enum>Qt::PlainText</enum>
</property>
<property name="textInteractionFlags" >
<enum>Qt::NoTextInteraction</enum>
</property>
<property name="buddy" >
<cstring>horizontalSlider</cstring>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QLabel" name="label" >
<property name="text" >
<string>Threshold</string>
</property>
</widget>
</item>
<item>
<widget class="QDoubleSpinBox" name="doubleSpinBox" />
</item>
</layout>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_6" >
<property name="title" >
<string>Color Weights</string>
</property>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>9</number>
</property>
<property name="spacing" >
<number>0</number>
</property>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QLabel" name="label_10" >
<property name="text" >
<string>Red</string>
</property>
<property name="buddy" >
<cstring>doubleSpinBox_2</cstring>
</property>
</widget>
</item>
<item>
<widget class="QDoubleSpinBox" name="doubleSpinBox_2" />
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QLabel" name="label_11" >
<property name="text" >
<string>Green</string>
</property>
<property name="buddy" >
<cstring>doubleSpinBox_3</cstring>
</property>
</widget>
</item>
<item>
<widget class="QDoubleSpinBox" name="doubleSpinBox_3" />
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QLabel" name="label_12" >
<property name="text" >
<string>Blue</string>
</property>
<property name="buddy" >
<cstring>doubleSpinBox_4</cstring>
</property>
</widget>
</item>
<item>
<widget class="QDoubleSpinBox" name="doubleSpinBox_4" />
</item>
</layout>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
</widget>
<widget class="QWidget" name="tab" >
<attribute name="title" >
<string>Preview</string>
</attribute>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>9</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QGroupBox" name="groupBox" >
<property name="title" >
<string>Input</string>
</property>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>3</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_2" >
<property name="title" >
<string>Output</string>
</property>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>3</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
</layout>
</widget>
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QCheckBox" name="filterCheckBox" >
<property name="text" >
<string>Bilinear Filter</string>
</property>
<property name="checked" >
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="diffCheckBox" >
<property name="text" >
<string>View difference</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
<widget class="QWidget" name="tab_2" >
<attribute name="title" >
<string>3D Preview</string>
</attribute>
<layout class="QVBoxLayout" >
<property name="margin" >
<number>9</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<spacer>
<property name="orientation" >
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" >
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QComboBox" name="comboBox_3" >
<item>
<property name="text" >
<string>Quad</string>
</property>
</item>
<item>
<property name="text" >
<string>Sphere</string>
</property>
</item>
<item>
<property name="text" >
<string>Cylinder</string>
</property>
</item>
<item>
<property name="text" >
<string>Teapot</string>
</property>
</item>
</widget>
</item>
</layout>
</item>
<item>
<widget class="QFrame" name="frame" >
<property name="frameShape" >
<enum>QFrame::StyledPanel</enum>
</property>
<property name="frameShadow" >
<enum>QFrame::Raised</enum>
</property>
</widget>
</item>
</layout>
</widget>
</widget>
</item>
</layout>
</item>
<item>
<widget class="Line" name="line" >
<property name="orientation" >
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" >
<property name="margin" >
<number>0</number>
</property>
<property name="spacing" >
<number>6</number>
</property>
<item>
<widget class="QPushButton" name="pushButton_6" >
<property name="text" >
<string>Default</string>
</property>
</widget>
</item>
<item>
<widget class="QProgressBar" name="progressBar" >
<property name="value" >
<number>24</number>
</property>
<property name="orientation" >
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="pushButton" >
<property name="text" >
<string>Quit</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
<resources/>
<connections>
<connection>
<sender>listWidget</sender>
<signal>currentRowChanged(int)</signal>
<receiver>tabWidget</receiver>
<slot>setCurrentPage(int)</slot>
<hints>
<hint type="sourcelabel" >
<x>114</x>
<y>67</y>
</hint>
<hint type="destinationlabel" >
<x>173</x>
<y>95</y>
</hint>
</hints>
</connection>
</connections>
</ui>

View File

@ -0,0 +1,57 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvimage/Image.h>
#include <nvimage/DirectDrawSurface.h>
#include "cmdline.h"
int main(int argc, char *argv[])
{
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
if (argc != 2)
{
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
printf("usage: nvddsinfo ddsfile\n\n");
return 1;
}
// Load surface.
nv::DirectDrawSurface dds(argv[1]);
if (!dds.isValid())
{
printf("The file '%s' is not a valid DDS file.\n", argv[1]);
return 1;
}
dds.printInfo();
return 0;
}

View File

@ -0,0 +1,71 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvimage/Image.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvimage/ImageIO.h>
#include "cmdline.h"
int main(int argc, char *argv[])
{
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
if (argc != 2)
{
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
printf("usage: nvdecompress 'ddsfile'\n\n");
return 1;
}
// Load surface.
nv::DirectDrawSurface dds(argv[1]);
if (!dds.isValid())
{
printf("The file '%s' is not a valid DDS file.\n", argv[1]);
return 1;
}
nv::Path name(argv[1]);
name.stripExtension();
name.append(".tga");
nv::StdOutputStream stream(name.str());
if (stream.isError()) {
printf("Error opening '%s' for writting\n", name.str());
return 1;
}
// @@ TODO: Add command line options to output mipmaps, cubemap faces, etc.
nv::Image img;
dds.mipmap(&img, 0, 0); // get first image
nv::ImageIO::saveTGA(stream, &img);
return 0;
}

232
src/nvtt/tools/imgdiff.cpp Normal file
View File

@ -0,0 +1,232 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvcore/Containers.h>
#include <nvimage/Image.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvmath/Color.h>
#include <math.h>
#include "cmdline.h"
static bool loadImage(nv::Image & image, const char * fileName)
{
if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0)
{
nv::DirectDrawSurface dds(fileName);
if (!dds.isValid())
{
printf("The file '%s' is not a valid DDS file.\n", fileName);
return false;
}
dds.mipmap(&image, 0, 0); // get first image
}
else
{
// Regular image.
if (!image.load(fileName))
{
printf("The file '%s' is not a supported image type.\n", fileName);
return false;
}
}
return true;
}
// @@ Compute per-tile errors.
struct Error
{
Error()
{
samples = 0;
mabse = 0.0f;
maxabse = 0.0f;
mse = 0.0f;
}
void addSample(float e)
{
samples++;
mabse += fabsf(e);
maxabse = nv::max(maxabse, fabsf(e));
mse += e * e;
}
void done()
{
mabse /= samples;
mse /= samples;
rmse = sqrt(mse);
psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0 / rmse);
}
void print()
{
printf("Mean absolute error: %f\n", mabse);
printf("Max absolute error: %f\n", maxabse);
printf("Root mean squared error: %f\n", rmse);
printf("Peak signal to noise ratio in dB: %f\n", psnr);
}
int samples;
float mabse;
float maxabse;
float mse;
float rmse;
float psnr;
};
int main(int argc, char *argv[])
{
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
bool compareNormal = false;
bool compareAlpha = false;
nv::Path input0;
nv::Path input1;
nv::Path output;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
// Input options.
if (strcmp("-normal", argv[i]) == 0)
{
compareNormal = true;
}
if (strcmp("-alpha", argv[i]) == 0)
{
compareAlpha = true;
}
else if (argv[i][0] != '-')
{
input0 = argv[i];
if (i+1 < argc && argv[i+1][0] != '-') {
input1 = argv[i+1];
}
break;
}
}
if (input0.isNull() || input1.isNull())
{
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
printf("usage: nvimgdiff [options] original_file updated_file [output]\n\n");
printf("Diff options:\n");
printf(" -normal \tCompare images as if they were normal maps.\n");
printf(" -alpha \tCompare alpha weighted images.\n");
return 1;
}
nv::Image image0, image1;
if (!loadImage(image0, input0)) return 0;
if (!loadImage(image1, input1)) return 0;
const uint w0 = image0.width();
const uint h0 = image0.height();
const uint w1 = image1.width();
const uint h1 = image1.height();
const uint w = nv::min(w0, w1);
const uint h = nv::min(h0, h1);
// Compute errors.
Error error_r;
Error error_g;
Error error_b;
Error error_a;
Error error_total;
for (uint i = 0; i < h; i++)
{
for (uint e = 0; e < w; e++)
{
const nv::Color32 c0(image0.pixel(e, i));
const nv::Color32 c1(image1.pixel(e, i));
float r = c0.r - c1.r;
float g = c0.g - c1.g;
float b = c0.b - c1.b;
float a = c0.a - c1.a;
error_r.addSample(r);
error_g.addSample(g);
error_b.addSample(b);
error_a.addSample(a);
if (compareAlpha)
{
error_total.addSample(r * c0.a / 255.0f);
error_total.addSample(g * c0.a / 255.0f);
error_total.addSample(b * c0.a / 255.0f);
}
else
{
error_total.addSample(r);
error_total.addSample(g);
error_total.addSample(b);
}
}
}
error_r.done();
error_g.done();
error_b.done();
error_a.done();
error_total.done();
printf("Image size compared: %dx%d\n", w, h);
if (w != w0 || w != w1 || h != h0 || h != h1) {
printf("--- NOTE: only the overlap between the 2 images (%d,%d) and (%d,%d) was compared\n", w0, h0, w1, h1);
}
printf("Total pixels: %d\n", w*h);
printf("Color:\n");
error_total.print();
if (compareAlpha)
{
printf("Alpha:\n");
error_a.print();
}
// @@ Write image difference.
return 0;
}