Tag 2.0.6 for release.

This commit is contained in:
castano
2009-03-19 19:06:30 +00:00
parent 820eb374d5
commit de8f0153c0
306 changed files with 9379 additions and 20606 deletions

View File

@ -5,8 +5,8 @@ ADD_SUBDIRECTORY(squish)
SET(NVTT_SRCS
nvtt.h
nvtt.cpp
Context.h
Context.cpp
Compressor.h
Compressor.cpp
nvtt_wrapper.h
nvtt_wrapper.cpp
CompressDXT.h
@ -24,7 +24,6 @@ SET(NVTT_SRCS
InputOptions.cpp
OutputOptions.h
OutputOptions.cpp
Texture.h Texture.cpp
cuda/CudaUtils.h
cuda/CudaUtils.cpp
cuda/CudaMath.h
@ -44,7 +43,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVTT_EXPORTS)
IF(NVTT_SHARED)
IF(NVTT_SHARED)
ADD_DEFINITIONS(-DNVTT_SHARED=1)
ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
@ -60,5 +60,54 @@ INSTALL(TARGETS nvtt
INSTALL(FILES nvtt.h DESTINATION include/nvtt)
ADD_SUBDIRECTORY(tools)
ADD_SUBDIRECTORY(tests)
# test executables
ADD_EXECUTABLE(nvcompress tools/compress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvdecompress tools/decompress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage)
ADD_EXECUTABLE(nvddsinfo tools/ddsinfo.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage)
ADD_EXECUTABLE(nvimgdiff tools/imgdiff.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage)
ADD_EXECUTABLE(nvassemble tools/assemble.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom DESTINATION bin)
# UI tools
IF(QT4_FOUND AND NOT MSVC)
SET(QT_USE_QTOPENGL TRUE)
INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
SET(SRCS
tools/main.cpp
tools/configdialog.h
tools/configdialog.cpp)
SET(LIBS
nvtt
${QT_QTCORE_LIBRARY}
${QT_QTGUI_LIBRARY}
${QT_QTOPENGL_LIBRARY})
QT4_WRAP_UI(UICS tools/configdialog.ui)
QT4_WRAP_CPP(MOCS tools/configdialog.h)
#QT4_ADD_RESOURCES(RCCS tools/configdialog.rc)
ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS})
TARGET_LINK_LIBRARIES(nvcompressui ${LIBS})
ENDIF(QT4_FOUND AND NOT MSVC)

View File

@ -21,6 +21,13 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Memory.h>
#include <nvimage/Image.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "nvtt.h"
#include "CompressDXT.h"
#include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"
@ -29,33 +36,22 @@
// squish
#include "squish/colourset.h"
//#include "squish/clusterfit.h"
#include "squish/fastclusterfit.h"
#include "squish/weightedclusterfit.h"
#include <nvtt/nvtt.h>
#include <nvcore/Memory.h>
#include <nvimage/Image.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
// s3_quant
#if defined(HAVE_S3QUANT)
#include "extern/s3tc/s3_quant.h"
#include "s3tc/s3_quant.h"
#endif
// ati tc
#if defined(HAVE_ATITC)
#include "extern/atitc/ATI_Compress.h"
#endif
// squish
#if defined(HAVE_SQUISH)
#include "extern/squish/squish.h"
#include "atitc/ATI_Compress.h"
#endif
//#include <time.h>
using namespace nv;
using namespace nvtt;
@ -209,9 +205,9 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
ColorBlock rgba;
BlockDXT1 block;
nvsquish::WeightedClusterFit fit;
//nvsquish::ClusterFit fit;
//nvsquish::FastClusterFit fit;
//squish::WeightedClusterFit fit;
//squish::ClusterFit fit;
squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
@ -225,8 +221,8 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
}
else
{
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
fit.SetColourSet(&colours, nvsquish::kDxt1);
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
@ -246,7 +242,7 @@ void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compr
ColorBlock rgba;
BlockDXT1 block;
nvsquish::WeightedClusterFit fit;
squish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
@ -269,8 +265,8 @@ void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compr
}
else
{
nvsquish::ColourSet colours((uint8 *)rgba.colors(), nvsquish::kDxt1|nvsquish::kWeightColourByAlpha);
fit.SetColourSet(&colours, nvsquish::kDxt1);
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
@ -290,15 +286,10 @@ void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compre
ColorBlock rgba;
BlockDXT3 block;
nvsquish::WeightedClusterFit fit;
squish::WeightedClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
int flags = 0;
if (m_alphaMode == AlphaMode_Transparency)
{
flags = nvsquish::kWeightColourByAlpha;
}
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
@ -314,7 +305,7 @@ void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compre
}
else
{
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
@ -334,15 +325,9 @@ void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compre
ColorBlock rgba;
BlockDXT5 block;
nvsquish::WeightedClusterFit fit;
squish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
int flags = 0;
if (m_alphaMode == AlphaMode_Transparency)
{
flags = nvsquish::kWeightColourByAlpha;
}
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
@ -365,7 +350,7 @@ void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compre
}
else
{
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
@ -386,9 +371,6 @@ void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compr
ColorBlock rgba;
BlockDXT5 block;
nvsquish::WeightedClusterFit fit;
fit.SetMetric(0, 1, 0);
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
@ -407,18 +389,7 @@ void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compr
}
// Compress Y.
//OptimalCompress::compressDXT1G(rgba, &block.color);
/*if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1G(rgba.color(0), &block.color);
}
else*/
{
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
OptimalCompress::compressDXT1G(rgba, &block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -624,27 +595,3 @@ void nv::atiCompressDXT1(const Image * image, const OutputOptions::Private & out
}
#endif // defined(HAVE_ATITC)
#if defined(HAVE_SQUISH)
void nv::squishCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
{
Image img(*image);
int count = img.width() * img.height();
for (int i = 0; i < count; i++)
{
Color32 c = img.pixel(i);
img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
void * blocks = malloc(size);
squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(blocks, size);
}
}
#endif // defined(HAVE_SQUISH)

View File

@ -32,14 +32,14 @@ namespace nv
class Image;
class FloatImage;
class FastCompressor
{
public:
FastCompressor();
~FastCompressor();
class FastCompressor
{
public:
FastCompressor();
~FastCompressor();
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
@ -47,9 +47,9 @@ namespace nv
void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
class SlowCompressor
{
@ -68,7 +68,7 @@ namespace nv
void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
@ -81,10 +81,6 @@ namespace nv
void atiCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
#endif
#if defined(HAVE_SQUISH)
void squishCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
#endif
} // nv namespace

View File

@ -21,19 +21,16 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvimage/Image.h>
#include <nvimage/PixelFormat.h>
#include <nvmath/Color.h>
#include "CompressRGB.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include <nvimage/PixelFormat.h>
#include <nvmath/Color.h>
#include <nvmath/Half.h>
#include <nvcore/Debug.h>
using namespace nv;
using namespace nvtt;
@ -69,53 +66,29 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
const uint w = image->width();
const uint h = image->height();
uint bitCount;
uint rmask, rshift, rsize;
uint gmask, gshift, gsize;
uint bmask, bshift, bsize;
uint amask, ashift, asize;
if (compressionOptions.bitcount != 0)
{
bitCount = compressionOptions.bitcount;
nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32);
rmask = compressionOptions.rmask;
gmask = compressionOptions.gmask;
bmask = compressionOptions.bmask;
amask = compressionOptions.amask;
PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
PixelFormat::maskShiftAndSize(amask, &ashift, &asize);
}
else
{
rsize = compressionOptions.rsize;
gsize = compressionOptions.gsize;
bsize = compressionOptions.bsize;
asize = compressionOptions.asize;
bitCount = rsize + gsize + bsize + asize;
nvCheck(bitCount <= 32);
ashift = 0;
bshift = ashift + asize;
gshift = bshift + bsize;
rshift = gshift + gsize;
rmask = ((1 << rsize) - 1) << rshift;
gmask = ((1 << gsize) - 1) << gshift;
bmask = ((1 << bsize) - 1) << bshift;
amask = ((1 << asize) - 1) << ashift;
}
const uint bitCount = compressionOptions.bitcount;
nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32);
const uint byteCount = bitCount / 8;
const uint rmask = compressionOptions.rmask;
uint rshift, rsize;
PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
const uint gmask = compressionOptions.gmask;
uint gshift, gsize;
PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
const uint bmask = compressionOptions.bmask;
uint bshift, bsize;
PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
const uint amask = compressionOptions.amask;
uint ashift, asize;
PixelFormat::maskShiftAndSize(amask, &ashift, &asize);
// Determine pitch.
uint pitch = computePitch(w, bitCount);
uint pitch = computePitch(w, compressionOptions.bitcount);
uint8 * dst = (uint8 *)mem::malloc(pitch + 4);
@ -150,7 +123,7 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
}
// Zero padding.
for (uint x = w; x < pitch; x++)
for (uint x = w * byteCount; x < pitch; x++)
{
*(dst + x) = 0;
}
@ -165,75 +138,3 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
mem::free(dst);
}
void nv::compressRGB(const FloatImage * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
const uint rsize = compressionOptions.rsize;
const uint gsize = compressionOptions.gsize;
const uint bsize = compressionOptions.bsize;
const uint asize = compressionOptions.asize;
nvCheck(rsize == 0 || rsize == 16 || rsize == 32);
nvCheck(gsize == 0 || gsize == 16 || gsize == 32);
nvCheck(bsize == 0 || bsize == 16 || bsize == 32);
nvCheck(asize == 0 || asize == 16 || asize == 32);
const uint bitCount = rsize + gsize + bsize + asize;
const uint byteCount = bitCount / 8;
const uint pitch = w * byteCount;
uint8 * dst = (uint8 *)mem::malloc(pitch);
for (uint y = 0; y < h; y++)
{
const float * rchannel = image->scanline(y, 0);
const float * gchannel = image->scanline(y, 1);
const float * bchannel = image->scanline(y, 2);
const float * achannel = image->scanline(y, 3);
union FLOAT
{
float f;
uint32 u;
};
uint8 * ptr = dst;
for (uint x = 0; x < w; x++)
{
FLOAT r, g, b, a;
r.f = rchannel[x];
g.f = gchannel[x];
b.f = bchannel[x];
a.f = achannel[x];
if (rsize == 32) *((uint32 *)ptr) = r.u;
else if (rsize == 16) *((uint16 *)ptr) = half_from_float(r.u);
ptr += rsize / 8;
if (gsize == 32) *((uint32 *)ptr) = g.u;
else if (gsize == 16) *((uint16 *)ptr) = half_from_float(g.u);
ptr += gsize / 8;
if (bsize == 32) *((uint32 *)ptr) = b.u;
else if (bsize == 16) *((uint16 *)ptr) = half_from_float(b.u);
ptr += bsize / 8;
if (asize == 32) *((uint32 *)ptr) = a.u;
else if (asize == 16) *((uint16 *)ptr) = half_from_float(a.u);
ptr += asize / 8;
}
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(dst, pitch);
}
}
mem::free(dst);
}

View File

@ -29,11 +29,9 @@
namespace nv
{
class Image;
class FloatImage;
// Pixel format converter.
void compressRGB(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressRGB(const FloatImage * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
} // nv namespace

View File

@ -117,36 +117,8 @@ void CompressionOptions::setPixelFormat(uint bitcount, uint rmask, uint gmask, u
m.gmask = gmask;
m.bmask = bmask;
m.amask = amask;
m.rsize = 0;
m.gsize = 0;
m.bsize = 0;
m.asize = 0;
}
void CompressionOptions::setPixelFormat(uint8 rsize, uint8 gsize, uint8 bsize, uint8 asize)
{
nvCheck(rsize <= 32 || gsize <= 32 || bsize <= 32 || asize <= 32);
m.bitcount = 0;
m.rmask = 0;
m.gmask = 0;
m.bmask = 0;
m.amask = 0;
m.rsize = rsize;
m.gsize = gsize;
m.bsize = bsize;
m.asize = asize;
}
/// Set pixel type.
void CompressionOptions::setPixelType(PixelType pixelType)
{
m.pixelType = pixelType;
}
/// Use external compressor.
void CompressionOptions::setExternalCompressor(const char * name)
{

View File

@ -45,12 +45,6 @@ namespace nvtt
uint gmask;
uint bmask;
uint amask;
uint8 rsize;
uint8 gsize;
uint8 bsize;
uint8 asize;
PixelType pixelType;
nv::String externalCompressor;

854
src/nvtt/Compressor.cpp Normal file
View File

@ -0,0 +1,854 @@
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <nvcore/Memory.h>
#include <nvcore/Ptr.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include <nvimage/Filter.h>
#include <nvimage/Quantize.h>
#include <nvimage/NormalMap.h>
#include <nvimage/PixelFormat.h>
#include "Compressor.h"
#include "InputOptions.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
#include "CompressDXT.h"
#include "CompressRGB.h"
#include "cuda/CudaUtils.h"
#include "cuda/CudaCompressDXT.h"
using namespace nv;
using namespace nvtt;
namespace
{
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a) {
return 8;
}
else if (format == Format_DXT3) {
return 16;
}
else if (format == Format_DXT5 || format == Format_DXT5n) {
return 16;
}
else if (format == Format_BC4) {
return 8;
}
else if (format == Format_BC5) {
return 16;
}
return 0;
}
inline uint computePitch(uint w, uint bitsize)
{
uint p = w * ((bitsize + 7) / 8);
// Align to 32 bits.
return ((p + 3) / 4) * 4;
}
static int computeImageSize(uint w, uint h, uint d, uint bitCount, Format format)
{
if (format == Format_RGBA) {
return d * h * computePitch(w, bitCount);
}
else {
// @@ Handle 3D textures. DXT and VTC have different behaviors.
return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format);
}
}
} // namespace
namespace nvtt
{
// Mipmap could be:
// - a pointer to an input image.
// - a fixed point image.
// - a floating point image.
struct Mipmap
{
Mipmap() : m_inputImage(NULL) {}
~Mipmap() {}
// Reference input image.
void setFromInput(const InputOptions::Private & inputOptions, uint idx)
{
m_inputImage = inputOptions.image(idx);
m_fixedImage = NULL;
m_floatImage = NULL;
}
// Assign and take ownership of given image.
void setImage(FloatImage * image)
{
m_inputImage = NULL;
m_fixedImage = NULL;
m_floatImage = image;
}
// Convert linear float image to fixed image ready for compression.
void toFixedImage(const InputOptions::Private & inputOptions)
{
if (this->asFixedImage() == NULL)
{
nvDebugCheck(m_floatImage != NULL);
if (inputOptions.isNormalMap || inputOptions.outputGamma == 1.0f)
{
m_fixedImage = m_floatImage->createImage();
}
else
{
m_fixedImage = m_floatImage->createImageGammaCorrect(inputOptions.outputGamma);
}
}
}
// Convert input image to linear float image.
void toFloatImage(const InputOptions::Private & inputOptions)
{
if (m_floatImage == NULL)
{
nvDebugCheck(this->asFixedImage() != NULL);
m_floatImage = new FloatImage(this->asFixedImage());
if (inputOptions.isNormalMap)
{
// Expand normals to [-1, 1] range.
// floatImage->expandNormals(0);
}
else if (inputOptions.inputGamma != 1.0f)
{
// Convert to linear space.
m_floatImage->toLinear(0, 3, inputOptions.inputGamma);
}
}
}
const FloatImage * asFloatImage() const
{
return m_floatImage.ptr();
}
FloatImage * asFloatImage()
{
return m_floatImage.ptr();
}
const Image * asFixedImage() const
{
if (m_inputImage != NULL)
{
return m_inputImage;
}
return m_fixedImage.ptr();
}
Image * asMutableFixedImage()
{
if (m_inputImage != NULL)
{
// Do not modify input image, create a copy.
m_fixedImage = new Image(*m_inputImage);
m_inputImage = NULL;
}
return m_fixedImage.ptr();
}
private:
const Image * m_inputImage;
AutoPtr<Image> m_fixedImage;
AutoPtr<FloatImage> m_floatImage;
};
} // nvtt namespace
Compressor::Compressor() : m(*new Compressor::Private())
{
// CUDA initialization.
m.cudaSupported = cuda::isHardwarePresent();
m.cudaEnabled = m.cudaSupported;
if (m.cudaEnabled)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
}
Compressor::~Compressor()
{
delete &m;
cuda::exit();
}
/// Enable CUDA acceleration.
void Compressor::enableCudaAcceleration(bool enable)
{
if (m.cudaSupported)
{
m.cudaEnabled = enable;
}
if (m.cudaEnabled && m.cuda == NULL)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
}
/// Check if CUDA acceleration is enabled.
bool Compressor::isCudaAccelerationEnabled() const
{
return m.cudaEnabled;
}
/// Compress the input texture with the given compression options.
bool Compressor::process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
{
return m.compress(inputOptions.m, compressionOptions.m, outputOptions.m);
}
/// Estimate the size of compressing the input with the given options.
int Compressor::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const
{
return m.estimateSize(inputOptions.m, compressionOptions.m);
}
bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
// Make sure enums match.
nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp);
nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror);
nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat);
// Get output handler.
if (!outputOptions.openFile())
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen);
return false;
}
inputOptions.computeTargetExtents();
// Output DDS header.
if (!outputHeader(inputOptions, compressionOptions, outputOptions))
{
return false;
}
for (uint f = 0; f < inputOptions.faceCount; f++)
{
if (!compressMipmaps(f, inputOptions, compressionOptions, outputOptions))
{
return false;
}
}
outputOptions.closeFile();
return true;
}
// Output DDS header.
bool Compressor::Private::outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
// Output DDS header.
if (outputOptions.outputHandler == NULL || !outputOptions.outputHeader)
{
return true;
}
DDSHeader header;
header.setWidth(inputOptions.targetWidth);
header.setHeight(inputOptions.targetHeight);
int mipmapCount = inputOptions.realMipmapCount();
nvDebugCheck(mipmapCount > 0);
header.setMipmapCount(mipmapCount);
if (inputOptions.textureType == TextureType_2D) {
header.setTexture2D();
}
else if (inputOptions.textureType == TextureType_Cube) {
header.setTextureCube();
}
/*else if (inputOptions.textureType == TextureType_3D) {
header.setTexture3D();
header.setDepth(inputOptions.targetDepth);
}*/
if (compressionOptions.format == Format_RGBA)
{
header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount));
header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask);
}
else
{
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_DXT3) {
header.setFourCC('D', 'X', 'T', '3');
}
else if (compressionOptions.format == Format_DXT5) {
header.setFourCC('D', 'X', 'T', '5');
}
else if (compressionOptions.format == Format_DXT5n) {
header.setFourCC('D', 'X', 'T', '5');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_BC4) {
header.setFourCC('A', 'T', 'I', '1');
}
else if (compressionOptions.format == Format_BC5) {
header.setFourCC('A', 'T', 'I', '2');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
header.swapBytes();
uint headerSize = 128;
if (header.hasDX10Header())
{
nvStaticCheck(sizeof(DDSHeader) == 128 + 20);
headerSize = 128 + 20;
}
bool writeSucceed = outputOptions.outputHandler->writeData(&header, headerSize);
if (!writeSucceed && outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_FileWrite);
}
return writeSucceed;
}
bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
uint w = inputOptions.targetWidth;
uint h = inputOptions.targetHeight;
uint d = inputOptions.targetDepth;
Mipmap mipmap;
const uint mipmapCount = inputOptions.realMipmapCount();
nvDebugCheck(mipmapCount > 0);
for (uint m = 0; m < mipmapCount; m++)
{
if (outputOptions.outputHandler)
{
int size = computeImageSize(w, h, d, compressionOptions.bitcount, compressionOptions.format);
outputOptions.outputHandler->beginImage(size, w, h, d, f, m);
}
// @@ Where to do the color transform?
// - Color transform may not be linear, so we cannot do before computing mipmaps.
// - Should be done in linear space, that is, after gamma correction.
if (!initMipmap(mipmap, inputOptions, w, h, d, f, m))
{
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_InvalidInput);
return false;
}
}
quantizeMipmap(mipmap, compressionOptions);
compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
// Compute extents of next mipmap:
w = max(1U, w / 2);
h = max(1U, h / 2);
d = max(1U, d / 2);
}
return true;
}
bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const
{
// Find image from input.
int inputIdx = findExactMipmap(inputOptions, w, h, d, f);
if ((inputIdx == -1 || inputOptions.convertToNormalMap) && m != 0)
{
// Generate from last, when mipmap not found, or normal map conversion enabled.
downsampleMipmap(mipmap, inputOptions);
}
else
{
if (inputIdx != -1)
{
// If input mipmap found, then get from input.
mipmap.setFromInput(inputOptions, inputIdx);
}
else
{
// If not found, resize closest mipmap.
inputIdx = findClosestMipmap(inputOptions, w, h, d, f);
if (inputIdx == -1)
{
return false;
}
mipmap.setFromInput(inputOptions, inputIdx);
scaleMipmap(mipmap, inputOptions, w, h, d);
}
processInputImage(mipmap, inputOptions);
}
// Convert linear float image to fixed image ready for compression.
mipmap.toFixedImage(inputOptions);
return true;
}
int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const
{
for (int m = 0; m < int(inputOptions.mipmapCount); m++)
{
int idx = f * inputOptions.mipmapCount + m;
const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d))
{
if (inputImage.data != NULL)
{
return idx;
}
return -1;
}
else if (inputImage.width < int(w) || inputImage.height < int(h) || inputImage.depth < int(d))
{
return -1;
}
}
return -1;
}
int Compressor::Private::findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const
{
int bestIdx = -1;
for (int m = 0; m < int(inputOptions.mipmapCount); m++)
{
int idx = f * inputOptions.mipmapCount + m;
const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
if (inputImage.data != NULL)
{
int difference = (inputImage.width - w) + (inputImage.height - h) + (inputImage.depth - d);
if (difference < 0)
{
if (bestIdx == -1)
{
bestIdx = idx;
}
return bestIdx;
}
bestIdx = idx;
}
}
return bestIdx;
}
// Create mipmap from the given image.
void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{
// Make sure that floating point linear representation is available.
mipmap.toFloatImage(inputOptions);
const FloatImage * floatImage = mipmap.asFloatImage();
if (inputOptions.mipmapFilter == MipmapFilter_Box)
{
// Use fast downsample.
mipmap.setImage(floatImage->fastDownSample());
}
else if (inputOptions.mipmapFilter == MipmapFilter_Triangle)
{
TriangleFilter filter;
mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
}
else /*if (inputOptions.mipmapFilter == MipmapFilter_Kaiser)*/
{
nvDebugCheck(inputOptions.mipmapFilter == MipmapFilter_Kaiser);
KaiserFilter filter(inputOptions.kaiserWidth);
filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
}
// Normalize mipmap.
if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps)
{
normalizeNormalMap(mipmap.asFloatImage());
}
}
void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const
{
mipmap.toFloatImage(inputOptions);
// @@ Add more filters.
// @@ Select different filters for downscaling and reconstruction.
// Resize image.
BoxFilter boxFilter;
mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
}
// Process an input image: Convert to normal map, normalize, or convert to linear space.
void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{
if (inputOptions.convertToNormalMap)
{
mipmap.toFixedImage(inputOptions);
Vector4 heightScale = inputOptions.heightFactors;
mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale));
}
else if (inputOptions.isNormalMap)
{
if (inputOptions.normalizeMipmaps)
{
// If floating point image available, normalize in place.
if (mipmap.asFloatImage() == NULL)
{
FloatImage * floatImage = new FloatImage(mipmap.asFixedImage());
normalizeNormalMap(floatImage);
mipmap.setImage(floatImage);
}
else
{
normalizeNormalMap(mipmap.asFloatImage());
mipmap.setImage(mipmap.asFloatImage());
}
}
}
else
{
if (inputOptions.inputGamma != inputOptions.outputGamma)
{
mipmap.toFloatImage(inputOptions);
}
}
}
// Quantize the given mipmap according to the compression options.
void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const
{
nvDebugCheck(mipmap.asFixedImage() != NULL);
if (compressionOptions.binaryAlpha)
{
if (compressionOptions.enableAlphaDithering)
{
Quantize::FloydSteinberg_BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
}
else
{
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
}
}
if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering)
{
uint rsize = 8;
uint gsize = 8;
uint bsize = 8;
uint asize = 8;
if (compressionOptions.enableColorDithering)
{
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
{
rsize = 5;
gsize = 6;
bsize = 5;
}
else if (compressionOptions.format == Format_RGB)
{
uint rshift, gshift, bshift;
PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize);
PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize);
PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize);
}
}
if (compressionOptions.enableAlphaDithering)
{
if (compressionOptions.format == Format_DXT3)
{
asize = 4;
}
else if (compressionOptions.format == Format_RGB)
{
uint ashift;
PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize);
}
}
if (compressionOptions.binaryAlpha)
{
asize = 8; // Already quantized.
}
Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize);
}
}
// Compress the given mipmap.
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
const Image * image = mipmap.asFixedImage();
nvDebugCheck(image != NULL);
FastCompressor fast;
fast.setImage(image, inputOptions.alphaMode);
SlowCompressor slow;
slow.setImage(image, inputOptions.alphaMode);
const bool useCuda = cudaEnabled && image->width() * image->height() >= 512;
if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
{
compressRGB(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_DXT1)
{
#if defined(HAVE_S3QUANT)
if (compressionOptions.externalCompressor == "s3")
{
s3CompressDXT1(image, outputOptions);
}
else
#endif
#if defined(HAVE_ATITC)
if (compressionOptions.externalCompressor == "ati")
{
atiCompressDXT1(image, outputOptions);
}
else
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
}
else
{
slow.compressDXT1(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT1a)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1a(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
/*cuda*/slow.compressDXT1a(compressionOptions, outputOptions);
}
else
{
slow.compressDXT1a(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT3)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT3(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT3(compressionOptions, outputOptions);
}
else
{
slow.compressDXT3(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT5)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT5(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT5(compressionOptions, outputOptions);
}
else
{
slow.compressDXT5(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT5n)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT5n(outputOptions);
}
else
{
slow.compressDXT5n(compressionOptions, outputOptions);
}
}
else if (compressionOptions.format == Format_BC4)
{
slow.compressBC4(compressionOptions, outputOptions);
}
else if (compressionOptions.format == Format_BC5)
{
slow.compressBC5(compressionOptions, outputOptions);
}
return true;
}
int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const
{
const Format format = compressionOptions.format;
const uint bitCount = compressionOptions.bitcount;
inputOptions.computeTargetExtents();
uint mipmapCount = inputOptions.realMipmapCount();
int size = 0;
for (uint f = 0; f < inputOptions.faceCount; f++)
{
uint w = inputOptions.targetWidth;
uint h = inputOptions.targetHeight;
uint d = inputOptions.targetDepth;
for (uint m = 0; m < mipmapCount; m++)
{
size += computeImageSize(w, h, d, bitCount, format);
// Compute extents of next mipmap:
w = max(1U, w / 2);
h = max(1U, h / 2);
d = max(1U, d / 2);
}
}
return size;
}

View File

@ -58,7 +58,6 @@ namespace nvtt
void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
void premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;

File diff suppressed because it is too large Load Diff

View File

@ -23,11 +23,8 @@
#include <string.h> // memcpy
#include <nvcore/Containers.h> // nextPowerOfTwo
#include <nvcore/Memory.h>
#include <nvmath/Color.h>
#include "nvtt.h"
#include "InputOptions.h"
@ -104,8 +101,6 @@ void InputOptions::reset()
m.colorTransform = ColorTransform_None;
m.linearTransform = Matrix(identity);
for (int i = 0; i < 4; i++) m.colorOffsets[i] = 0;
for (int i = 0; i < 4; i++) m.swizzleTransform[i] = i;
m.generateMipmaps = true;
m.maxLevel = -1;
@ -123,8 +118,6 @@ void InputOptions::reset()
m.maxExtent = 0;
m.roundMode = RoundMode_None;
m.premultiplyAlpha = false;
}
@ -168,8 +161,7 @@ void InputOptions::setTextureLayout(TextureType type, int width, int height, int
img.mipLevel = mipLevel;
img.face = f;
img.uint8data = NULL;
img.floatdata = NULL;
img.data = NULL;
w = max(1U, w / 2);
h = max(1U, h / 2);
@ -207,116 +199,14 @@ bool InputOptions::setMipmapData(const void * data, int width, int height, int d
return false;
}
switch(m.inputFormat)
{
case InputFormat_BGRA_8UB:
if (Image * image = new nv::Image())
{
image->allocate(width, height);
memcpy(image->pixels(), data, width * height * 4);
m.images[idx].uint8data = image;
}
else
{
// @@ Out of memory error.
return false;
}
break;
case InputFormat_RGBA_32F:
if (FloatImage * image = new nv::FloatImage())
{
const float * floatData = (const float *)data;
image->allocate(4, width, height);
for (int c = 0; c < 4; c++)
{
float * channel = image->channel(c);
for (int i = 0; i < width * height; i++)
{
channel[i] = floatData[i*4 + c];
}
}
m.images[idx].floatdata = image;
}
else
{
// @@ Out of memory error.
return false;
}
break;
default:
return false;
}
m.images[idx].data = new nv::Image();
m.images[idx].data->allocate(width, height);
memcpy(m.images[idx].data->pixels(), data, width * height * 4);
return true;
}
// Copies data
bool InputOptions::setMipmapChannelData(const void * data, int channel, int width, int height, int depth /*= 1*/, int face /*= 0*/, int mipLevel /*= 0*/)
{
nvCheck(depth == 1);
nvCheck(channel >= 0 && channel < 4);
const int idx = face * m.mipmapCount + mipLevel;
if (m.images[idx].width != width || m.images[idx].height != height || m.images[idx].depth != depth || m.images[idx].mipLevel != mipLevel || m.images[idx].face != face)
{
// Invalid dimension or index.
return false;
}
// Allocate image if not allocated already.
if (m.inputFormat == InputFormat_BGRA_8UB)
{
m.images[idx].floatdata = NULL;
if (m.images[idx].uint8data == NULL)
{
m.images[idx].uint8data = new Image();
m.images[idx].uint8data->allocate(width, height);
m.images[idx].uint8data->fill(Color32(0,0,0,0));
}
}
else if (m.inputFormat == InputFormat_RGBA_32F)
{
m.images[idx].uint8data = NULL;
if (m.images[idx].floatdata == NULL)
{
m.images[idx].floatdata = new FloatImage();
m.images[idx].floatdata->allocate(4, width, height);
m.images[idx].floatdata->clear();
}
}
else
{
m.images[idx].floatdata = NULL;
m.images[idx].uint8data = NULL;
return false;
}
// Copy channel data to image.
if (m.inputFormat == InputFormat_BGRA_8UB)
{
// @@ TODO
}
else if (m.inputFormat == InputFormat_RGBA_32F)
{
const float * floatData = (const float *)data;
float * channelPtr = m.images[idx].floatdata->channel(channel);
for (int i = 0; i < width * height; i++)
{
channelPtr[i] = floatData[i];
}
}
return true;
}
/// Describe the format of the input.
void InputOptions::setFormat(InputFormat format)
{
@ -411,32 +301,8 @@ void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2,
{
nvCheck(channel >= 0 && channel < 4);
m.linearTransform(channel, 0) = w0;
m.linearTransform(channel, 1) = w1;
m.linearTransform(channel, 2) = w2;
m.linearTransform(channel, 3) = w3;
}
void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2, float w3, float offset)
{
nvCheck(channel >= 0 && channel < 4);
setLinearTransform(channel, w0, w1, w2, w3);
m.colorOffsets[channel] = offset;
}
void InputOptions::setSwizzleTransform(int x, int y, int z, int w)
{
nvCheck(x >= 0 && x <= 6);
nvCheck(y >= 0 && y <= 6);
nvCheck(z >= 0 && z <= 6);
nvCheck(w >= 0 && w <= 6);
m.swizzleTransform[0] = x;
m.swizzleTransform[1] = y;
m.swizzleTransform[2] = z;
m.swizzleTransform[3] = w;
Vector4 w(w0, w1, w2, w3);
//m.linearTransform.setRow(channel, w);
}
void InputOptions::setMaxExtents(int e)
@ -450,10 +316,6 @@ void InputOptions::setRoundMode(RoundMode mode)
m.roundMode = mode;
}
void InputOptions::setPremultiplyAlpha(bool b)
{
m.premultiplyAlpha = b;
}
void InputOptions::Private::computeTargetExtents() const
{
@ -533,7 +395,7 @@ const Image * InputOptions::Private::image(uint face, uint mipmap) const
nvDebugCheck(image.face == face);
nvDebugCheck(image.mipLevel == mipmap);
return image.uint8data.ptr();
return image.data.ptr();
}
const Image * InputOptions::Private::image(uint idx) const
@ -542,14 +404,5 @@ const Image * InputOptions::Private::image(uint idx) const
const InputImage & image = this->images[idx];
return image.uint8data.ptr();
}
const FloatImage * InputOptions::Private::floatImage(uint idx) const
{
nvDebugCheck(idx < faceCount * mipmapCount);
const InputImage & image = this->images[idx];
return image.floatdata.ptr();
return image.data.ptr();
}

View File

@ -28,7 +28,6 @@
#include <nvmath/Vector.h>
#include <nvmath/Matrix.h>
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include "nvtt.h"
namespace nvtt
@ -57,8 +56,6 @@ namespace nvtt
// Color transform.
ColorTransform colorTransform;
nv::Matrix linearTransform;
float colorOffsets[4];
uint swizzleTransform[4];
// Mipmap generation options.
bool generateMipmaps;
@ -81,8 +78,6 @@ namespace nvtt
uint maxExtent;
RoundMode roundMode;
bool premultiplyAlpha;
// @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere...
mutable uint targetWidth;
mutable uint targetHeight;
@ -94,9 +89,7 @@ namespace nvtt
int realMipmapCount() const;
const nv::Image * image(uint face, uint mipmap) const;
const nv::Image * image(uint idx) const;
const nv::FloatImage * floatImage(uint idx) const;
const nv::Image * image(uint idx) const;
};
@ -105,8 +98,6 @@ namespace nvtt
{
InputImage() {}
bool hasValidData() const { return uint8data != NULL || floatdata != NULL; }
int mipLevel;
int face;
@ -114,8 +105,7 @@ namespace nvtt
int height;
int depth;
nv::AutoPtr<nv::Image> uint8data;
nv::AutoPtr<nv::FloatImage> floatdata;
nv::AutoPtr<nv::Image> data;
};
} // nvtt namespace

View File

@ -21,17 +21,16 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvmath/Color.h>
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
#include <nvcore/Containers.h> // swap
#include <limits.h>
using namespace nv;
using namespace OptimalCompress;
@ -40,37 +39,10 @@ using namespace OptimalCompress;
namespace
{
static int greenDistance(int g0, int g1)
{
//return abs(g0 - g1);
int d = g0 - g1;
return d * d;
}
static int alphaDistance(int a0, int a1)
{
//return abs(a0 - a1);
int d = a0 - a1;
return d * d;
}
static uint nearestGreen4(uint green, uint maxGreen, uint minGreen)
{
uint bias = maxGreen + (maxGreen - minGreen) / 6;
uint index = 0;
if (maxGreen - minGreen != 0) index = clamp(3 * (bias - green) / (maxGreen - minGreen), 0U, 3U);
return (index * minGreen + (3 - index) * maxGreen) / 3;
}
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block, int bestError = INT_MAX)
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
// uint g0 = (block->col0.g << 2) | (block->col0.g >> 4);
// uint g1 = (block->col1.g << 2) | (block->col1.g >> 4);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
@ -78,24 +50,17 @@ namespace
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = greenDistance(green, palette[0]);
error = min(error, greenDistance(green, palette[1]));
error = min(error, greenDistance(green, palette[2]));
error = min(error, greenDistance(green, palette[3]));
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
// totalError += nearestGreen4(green, g0, g1);
if (totalError > bestError)
{
// early out
return totalError;
}
}
return totalError;
@ -113,10 +78,10 @@ namespace
{
const int color = rgba.color(i).g;
uint d0 = greenDistance(color0, color);
uint d1 = greenDistance(color1, color);
uint d2 = greenDistance(color2, color);
uint d3 = greenDistance(color3, color);
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
@ -137,78 +102,49 @@ namespace
// Choose quantized color that produces less error. Used by DXT3 compressor.
inline static uint quantize4(uint8 a)
{
int q0 = max(int(a >> 4) - 1, 0);
int q0 = (a >> 4) - 1;
int q1 = (a >> 4);
int q2 = min(int(a >> 4) + 1, 0xF);
int q2 = (a >> 4) + 1;
q0 = (q0 << 4) | q0;
q1 = (q1 << 4) | q1;
q2 = (q2 << 4) | q2;
int d0 = alphaDistance(q0, a);
int d1 = alphaDistance(q1, a);
int d2 = alphaDistance(q2, a);
int d0 = abs(q0 - a);
int d1 = abs(q1 - a);
int d2 = abs(q2 - a);
if (d0 < d1 && d0 < d2) return q0 >> 4;
if (d1 < d2) return q1 >> 4;
return q2 >> 4;
}
static uint nearestAlpha8(uint alpha, uint maxAlpha, uint minAlpha)
{
float bias = maxAlpha + float(maxAlpha - minAlpha) / (2.0f * 7.0f);
float scale = 7.0f / float(maxAlpha - minAlpha);
uint index = (uint)clamp((bias - float(alpha)) * scale, 0.0f, 7.0f);
return (index * minAlpha + (7 - index) * maxAlpha) / 7;
}
static uint computeAlphaError8(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
{
int totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
totalError += alphaDistance(alpha, nearestAlpha8(alpha, block->alpha0, block->alpha1));
if (totalError > bestError)
{
// early out
return totalError;
}
}
return totalError;
}
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
int totalError = 0;
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int minDist = INT_MAX;
uint besterror = 256*256;
uint best;
for (uint p = 0; p < 8; p++)
{
int dist = alphaDistance(alpha, alphas[p]);
minDist = min(dist, minDist);
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += minDist;
if (totalError > bestError)
{
// early out
return totalError;
}
totalError += besterror;
}
return totalError;
@ -223,21 +159,22 @@ namespace
{
uint8 alpha = rgba.color(i).a;
int minDist = INT_MAX;
int bestIndex = 8;
for (uint p = 0; p < 8; p++)
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int dist = alphaDistance(alpha, alphas[p]);
int d = alphas[p] - alpha;
uint error = d * d;
if (dist < minDist)
if (error < besterror)
{
minDist = dist;
bestIndex = p;
besterror = error;
best = p;
}
}
nvDebugCheck(bestIndex < 8);
nvDebugCheck(best < 8);
block->setIndex(i, bestIndex);
block->setIndex(i, best);
}
}
@ -280,23 +217,6 @@ void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
}
}
void OptimalCompress::compressDXT1G(uint8 g, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = 31;
dxtBlock->col0.g = OMatch6[g][0];
dxtBlock->col0.b = 0;
dxtBlock->col1.r = 31;
dxtBlock->col1.g = OMatch6[g][1];
dxtBlock->col1.b = 0;
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
// Brute force green channel compressor
void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
@ -306,23 +226,12 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
uint8 ming = 63;
uint8 maxg = 0;
bool isSingleColor = true;
uint8 singleColor = rgba.color(0).g;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = (rgba.color(i).g + 1) >> 2;
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
if (rgba.color(i).g != singleColor) isSingleColor = false;
}
if (isSingleColor)
{
compressDXT1G(singleColor, block);
return;
}
block->col0.r = 31;
@ -332,38 +241,36 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
block->col0.b = 0;
block->col1.b = 0;
int bestError = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
// Expand search space a bit.
const int greenExpand = 4;
ming = (ming <= greenExpand) ? 0 : ming - greenExpand;
maxg = (maxg >= 63-greenExpand) ? 63 : maxg + greenExpand;
for (int g0 = ming+1; g0 <= maxg; g0++)
if (maxg - ming > 4)
{
for (int g1 = ming; g1 < g0; g1++)
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block, bestError);
if (error < bestError)
for (int g1 = ming; g1 < g0-4; g1++)
{
bestError = error;
bestg0 = g0;
bestg1 = g1;
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
block->col0.g = bestg0;
block->col1.g = bestg1;
nvDebugCheck(bestg0 == bestg1 || block->isFourColorMode());
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
@ -406,26 +313,42 @@ void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dx
dxtBlock->alpha0 = maxa;
dxtBlock->alpha1 = mina;
/*int centroidDist = 256;
int centroid;
// Get the closest to the centroid.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int dist = abs(alpha - (maxa + mina) / 2);
if (dist < centroidDist)
{
centroidDist = dist;
centroid = alpha;
}
}*/
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, dxtBlock);
int besta0 = maxa;
int besta1 = mina;
// Expand search space a bit.
const int alphaExpand = 8;
mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand;
maxa = (maxa >= 255-alphaExpand) ? 255 : maxa + alphaExpand;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
//for (int a1 = mina; a1 < maxa; a1++)
{
nvDebugCheck(a0 - a1 > 8);
//nvCheck(abs(a1-a0) > 8);
//if (abs(a0 - a1) < 8) continue;
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
if ((maxa-a0) + (a1-mina) > besterror)
continue;
dxtBlock->alpha0 = a0;
dxtBlock->alpha1 = a1;
int error = computeAlphaError(rgba, dxtBlock, besterror);
int error = computeAlphaError(rgba, dxtBlock);
if (error < besterror)
{

View File

@ -26,8 +26,6 @@
#include <nvimage/nvimage.h>
#include <nvmath/Color.h>
namespace nv
{
struct ColorBlock;
@ -41,7 +39,6 @@ namespace nv
{
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(uint8 g, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);

View File

@ -43,7 +43,6 @@ void OutputOptions::reset()
m.outputHandler = NULL;
m.errorHandler = NULL;
m.outputHeader = true;
m.container = Container_DDS;
}
@ -73,12 +72,6 @@ void OutputOptions::setOutputHeader(bool outputHeader)
m.outputHeader = outputHeader;
}
/// Set container.
void OutputOptions::setContainer(Container container)
{
m.container = container;
}
bool OutputOptions::Private::openFile() const
{

View File

@ -64,7 +64,6 @@ namespace nvtt
mutable OutputHandler * outputHandler;
ErrorHandler * errorHandler;
bool outputHeader;
Container container;
bool openFile() const;
void closeFile() const;

View File

@ -21,10 +21,7 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvmath/Fitting.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
@ -133,7 +130,7 @@ inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1)
return dot(c0-c1, c0-c1);
}
inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
inline static uint computeIndices4(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 palette[4];
palette[0] = maxColor;
@ -165,28 +162,6 @@ inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColo
return indices;
}
inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 palette[4];
palette[0] = maxColor;
palette[1] = minColor;
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
float total = 0.0f;
for (int i = 0; i < 16; i++)
{
float d0 = colorDistance(palette[0], block[i]);
float d1 = colorDistance(palette[1], block[i]);
float d2 = colorDistance(palette[2], block[i]);
float d3 = colorDistance(palette[3], block[i]);
total += min(min(d0, d1), min(d2, d3));
}
return total;
}
inline static uint computeIndices3(const ColorBlock & rgba, Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 palette[4];
@ -475,8 +450,7 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
// read block
Vector3 block[16];
extractColorBlockRGB(rgba, block);
#if 1
// find min and max colors
Vector3 maxColor, minColor;
findMinMaxColorsBox(block, 16, &maxColor, &minColor);
@ -484,31 +458,7 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
selectDiagonal(block, 16, &maxColor, &minColor);
insetBBox(&maxColor, &minColor);
#else
float weights[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
Vector3 cluster[4];
int count = Compute4Means(16, block, weights, Vector3(1, 1, 1), cluster);
Vector3 maxColor, minColor;
float bestError = FLT_MAX;
for (int i = 1; i < 4; i++)
{
for (int j = 0; j < i; j++)
{
uint16 color0 = roundAndExpand(&cluster[i]);
uint16 color1 = roundAndExpand(&cluster[j]);
float error = evaluatePaletteError4(block, cluster[i], cluster[j]);
if (error < bestError) {
bestError = error;
maxColor = cluster[i];
minColor = cluster[j];
}
}
}
#endif
uint16 color0 = roundAndExpand(&maxColor);
uint16 color1 = roundAndExpand(&minColor);

View File

@ -1,787 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "Texture.h"
#include <nvmath/Vector.h>
#include <nvmath/Matrix.h>
#include <nvmath/Color.h>
#include <nvimage/Filter.h>
#include <nvimage/ImageIO.h>
#include <nvimage/NormalMap.h>
using namespace nv;
using namespace nvtt;
namespace
{
// 1 -> 1, 2 -> 2, 3 -> 2, 4 -> 4, 5 -> 4, ...
static uint previousPowerOfTwo(const uint v)
{
return nextPowerOfTwo(v + 1) / 2;
}
static uint nearestPowerOfTwo(const uint v)
{
const uint np2 = nextPowerOfTwo(v);
const uint pp2 = previousPowerOfTwo(v);
if (np2 - v <= v - pp2)
{
return np2;
}
else
{
return pp2;
}
}
}
TexImage::TexImage() : m(new TexImage::Private())
{
}
TexImage::TexImage(const TexImage & tex) : m(tex.m)
{
m->addRef();
}
TexImage::~TexImage()
{
m->release();
m = NULL;
}
void TexImage::operator=(const TexImage & tex)
{
tex.m->addRef();
m = tex.m;
m->release();
}
void TexImage::detach()
{
if (m->refCount() > 1)
{
m = new TexImage::Private(*m);
m->addRef();
nvDebugCheck(m->refCount() == 1);
}
}
void TexImage::setTextureType(TextureType type)
{
if (m->type != type)
{
detach();
m->type = type;
if (type == TextureType_2D)
{
// @@ Free images.
m->imageArray.resize(1, NULL);
}
else
{
nvCheck (type == TextureType_Cube);
m->imageArray.resize(6, NULL);
}
}
}
void TexImage::setWrapMode(WrapMode wrapMode)
{
if (m->wrapMode != wrapMode)
{
detach();
m->wrapMode = wrapMode;
}
}
void TexImage::setAlphaMode(AlphaMode alphaMode)
{
if (m->alphaMode != alphaMode)
{
detach();
m->alphaMode = alphaMode;
}
}
void TexImage::setNormalMap(bool isNormalMap)
{
if (m->isNormalMap != isNormalMap)
{
detach();
m->isNormalMap = isNormalMap;
}
}
int TexImage::width() const
{
if (m->imageArray.count() > 0)
{
return m->imageArray[0]->width();
}
return 0;
}
int TexImage::height() const
{
if (m->imageArray.count() > 0)
{
return m->imageArray[0]->height();
}
return 0;
}
int TexImage::depth() const
{
return 0;
}
int TexImage::faceCount() const
{
return m->imageArray.count();
}
TextureType TexImage::textureType() const
{
return m->type;
}
WrapMode TexImage::wrapMode() const
{
return m->wrapMode;
}
AlphaMode TexImage::alphaMode() const
{
return m->alphaMode;
}
bool TexImage::isNormalMap() const
{
return m->isNormalMap;
}
bool TexImage::load(const char * fileName)
{
// @@ Add support for DDS textures!
AutoPtr<FloatImage> img(ImageIO::loadFloat(fileName));
if (img == NULL)
{
return false;
}
detach();
m->imageArray.resize(1);
m->imageArray[0] = img.release();
return true;
}
bool TexImage::setImage2D(InputFormat format, int w, int h, int idx, const void * restrict data)
{
if (idx >= m->imageArray.count())
{
return false;
}
FloatImage * img = m->imageArray[idx];
if (img->width() != w || img->height() != h)
{
return false;
}
detach();
const int count = w * h;
float * restrict rdst = img->channel(0);
float * restrict gdst = img->channel(1);
float * restrict bdst = img->channel(2);
float * restrict adst = img->channel(3);
if (format == InputFormat_BGRA_8UB)
{
const Color32 * src = (const Color32 *)data;
try {
for (int i = 0; i < count; i++)
{
rdst[i] = src[i].r;
gdst[i] = src[i].g;
bdst[i] = src[i].b;
adst[i] = src[i].a;
}
}
catch(...) {
return false;
}
}
else if (format == InputFormat_RGBA_32F)
{
const float * src = (const float *)data;
try {
for (int i = 0; i < count; i++)
{
rdst[i] = src[4 * i + 0];
gdst[i] = src[4 * i + 1];
bdst[i] = src[4 * i + 2];
adst[i] = src[4 * i + 3];
}
}
catch(...) {
return false;
}
}
return true;
}
bool TexImage::setImage2D(InputFormat format, int w, int h, int idx, const void * restrict r, const void * restrict g, const void * restrict b, const void * restrict a)
{
if (idx >= m->imageArray.count())
{
return false;
}
FloatImage * img = m->imageArray[idx];
if (img->width() != w || img->height() != h)
{
return false;
}
detach();
const int count = w * h;
float * restrict rdst = img->channel(0);
float * restrict gdst = img->channel(1);
float * restrict bdst = img->channel(2);
float * restrict adst = img->channel(3);
if (format == InputFormat_BGRA_8UB)
{
const uint8 * restrict rsrc = (const uint8 *)r;
const uint8 * restrict gsrc = (const uint8 *)g;
const uint8 * restrict bsrc = (const uint8 *)b;
const uint8 * restrict asrc = (const uint8 *)a;
try {
for (int i = 0; i < count; i++) rdst[i] = float(rsrc[i]) / 255.0f;
for (int i = 0; i < count; i++) gdst[i] = float(gsrc[i]) / 255.0f;
for (int i = 0; i < count; i++) bdst[i] = float(bsrc[i]) / 255.0f;
for (int i = 0; i < count; i++) adst[i] = float(asrc[i]) / 255.0f;
}
catch(...) {
return false;
}
}
else if (format == InputFormat_RGBA_32F)
{
const float * rsrc = (const float *)r;
const float * gsrc = (const float *)g;
const float * bsrc = (const float *)b;
const float * asrc = (const float *)a;
try {
memcpy(rdst, rsrc, count * sizeof(float));
memcpy(gdst, gsrc, count * sizeof(float));
memcpy(bdst, bsrc, count * sizeof(float));
memcpy(adst, asrc, count * sizeof(float));
}
catch(...) {
return false;
}
}
return true;
}
void TexImage::resize(int w, int h, ResizeFilter filter)
{
if (m->imageArray.count() > 0)
{
if (w == m->imageArray[0]->width() && h == m->imageArray[0]->height()) return;
}
// @@ TODO: if cubemap, make sure w == h.
detach();
FloatImage::WrapMode wrapMode = (FloatImage::WrapMode)m->wrapMode;
foreach (i, m->imageArray)
{
if (m->imageArray[i] == NULL) continue;
if (m->alphaMode == AlphaMode_Transparency)
{
if (filter == ResizeFilter_Box)
{
BoxFilter filter;
m->imageArray[i]->resize(filter, w, h, wrapMode, 3);
}
else if (filter == ResizeFilter_Triangle)
{
TriangleFilter filter;
m->imageArray[i]->resize(filter, w, h, wrapMode, 3);
}
else if (filter == ResizeFilter_Kaiser)
{
//KaiserFilter filter(inputOptions.kaiserWidth);
//filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
KaiserFilter filter(3);
m->imageArray[i]->resize(filter, w, h, wrapMode, 3);
}
else //if (filter == ResizeFilter_Mitchell)
{
nvDebugCheck(filter == ResizeFilter_Mitchell);
MitchellFilter filter;
m->imageArray[i]->resize(filter, w, h, wrapMode, 3);
}
}
else
{
if (filter == ResizeFilter_Box)
{
BoxFilter filter;
m->imageArray[i]->resize(filter, w, h, wrapMode);
}
else if (filter == ResizeFilter_Triangle)
{
TriangleFilter filter;
m->imageArray[i]->resize(filter, w, h, wrapMode);
}
else if (filter == ResizeFilter_Kaiser)
{
//KaiserFilter filter(inputOptions.kaiserWidth);
//filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
KaiserFilter filter(3);
m->imageArray[i]->resize(filter, w, h, wrapMode);
}
else //if (filter == ResizeFilter_Mitchell)
{
nvDebugCheck(filter == ResizeFilter_Mitchell);
MitchellFilter filter;
m->imageArray[i]->resize(filter, w, h, wrapMode);
}
}
}
}
void TexImage::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter)
{
if (m->imageArray.count() > 0)
{
int w = m->imageArray[0]->width();
int h = m->imageArray[0]->height();
nvDebugCheck(w > 0);
nvDebugCheck(h > 0);
if (roundMode != RoundMode_None)
{
// rounded max extent should never be higher than original max extent.
maxExtent = previousPowerOfTwo(maxExtent);
}
// Scale extents without changing aspect ratio.
int maxwh = max(w, h);
if (maxExtent != 0 && maxwh > maxExtent)
{
w = max((w * maxExtent) / maxwh, 1);
h = max((h * maxExtent) / maxwh, 1);
}
// Round to power of two.
if (roundMode == RoundMode_ToNextPowerOfTwo)
{
w = nextPowerOfTwo(w);
h = nextPowerOfTwo(h);
}
else if (roundMode == RoundMode_ToNearestPowerOfTwo)
{
w = nearestPowerOfTwo(w);
h = nearestPowerOfTwo(h);
}
else if (roundMode == RoundMode_ToPreviousPowerOfTwo)
{
w = previousPowerOfTwo(w);
h = previousPowerOfTwo(h);
}
resize(w, h, filter);
}
}
bool TexImage::buildNextMipmap(MipmapFilter filter)
{
if (m->imageArray.count() > 0)
{
int w = m->imageArray[0]->width();
int h = m->imageArray[0]->height();
nvDebugCheck(w > 0);
nvDebugCheck(h > 0);
if (w == 1 && h == 1)
{
return false;
}
}
detach();
FloatImage::WrapMode wrapMode = (FloatImage::WrapMode)m->wrapMode;
foreach (i, m->imageArray)
{
if (m->imageArray[i] == NULL) continue;
if (m->alphaMode == AlphaMode_Transparency)
{
if (filter == MipmapFilter_Box)
{
BoxFilter filter;
m->imageArray[i]->downSample(filter, wrapMode, 3);
}
else if (filter == MipmapFilter_Triangle)
{
TriangleFilter filter;
m->imageArray[i]->downSample(filter, wrapMode, 3);
}
else if (filter == MipmapFilter_Kaiser)
{
nvDebugCheck(filter == MipmapFilter_Kaiser);
//KaiserFilter filter(inputOptions.kaiserWidth);
//filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
KaiserFilter filter(3);
m->imageArray[i]->downSample(filter, wrapMode, 3);
}
}
else
{
if (filter == MipmapFilter_Box)
{
m->imageArray[i]->fastDownSample();
}
else if (filter == MipmapFilter_Triangle)
{
TriangleFilter filter;
m->imageArray[i]->downSample(filter, wrapMode);
}
else //if (filter == MipmapFilter_Kaiser)
{
nvDebugCheck(filter == MipmapFilter_Kaiser);
//KaiserFilter filter(inputOptions.kaiserWidth);
//filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
KaiserFilter filter(3);
m->imageArray[i]->downSample(filter, wrapMode);
}
}
}
return true;
}
// Color transforms.
void TexImage::toLinear(float gamma)
{
if (equal(gamma, 1.0f)) return;
detach();
foreach (i, m->imageArray)
{
if (m->imageArray[i] == NULL) continue;
m->imageArray[i]->toLinear(0, 3, gamma);
}
}
void TexImage::toGamma(float gamma)
{
if (equal(gamma, 1.0f)) return;
detach();
foreach (i, m->imageArray)
{
if (m->imageArray[i] == NULL) continue;
m->imageArray[i]->toGamma(0, 3, gamma);
}
}
void TexImage::transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4])
{
detach();
Matrix xform(
Vector4(w0[0], w0[1], w0[2], w0[3]),
Vector4(w1[0], w1[1], w1[2], w1[3]),
Vector4(w2[0], w2[1], w2[2], w2[3]),
Vector4(w3[0], w3[1], w3[2], w3[3]));
Vector4 voffset(offset[0], offset[1], offset[2], offset[3]);
foreach (i, m->imageArray)
{
if (m->imageArray[i] == NULL) continue;
m->imageArray[i]->transform(0, xform, voffset);
}
}
void TexImage::swizzle(int r, int g, int b, int a)
{
if (r == 0 && g == 1 && b == 2 && a == 3) return;
detach();
foreach (i, m->imageArray)
{
if (m->imageArray[i] == NULL) continue;
m->imageArray[i]->swizzle(0, r, g, b, a);
}
}
void TexImage::scaleBias(int channel, float scale, float bias)
{
if (equal(scale, 1.0f) && equal(bias, 0.0f)) return;
detach();
foreach (i, m->imageArray)
{
if (m->imageArray[i] == NULL) continue;
m->imageArray[i]->scaleBias(channel, 1, scale, bias);
}
}
void TexImage::blend(float red, float green, float blue, float alpha, float t)
{
detach();
foreach (i, m->imageArray)
{
FloatImage * img = m->imageArray[i];
if (img == NULL) continue;
float * restrict r = img->channel(0);
float * restrict g = img->channel(1);
float * restrict b = img->channel(2);
float * restrict a = img->channel(3);
const int count = img->width() * img->height();
for (int i = 0; i < count; i++)
{
r[i] = lerp(r[i], red, t);
g[i] = lerp(g[i], green, t);
b[i] = lerp(b[i], blue, t);
a[i] = lerp(a[i], alpha, t);
}
}
}
void TexImage::premultiplyAlpha()
{
detach();
foreach (i, m->imageArray)
{
FloatImage * img = m->imageArray[i];
if (img == NULL) continue;
float * restrict r = img->channel(0);
float * restrict g = img->channel(1);
float * restrict b = img->channel(2);
float * restrict a = img->channel(3);
const int count = img->width() * img->height();
for (int i = 0; i < count; i++)
{
r[i] *= a[i];
g[i] *= a[i];
b[i] *= a[i];
}
}
}
void TexImage::toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale)
{
detach();
foreach (i, m->imageArray)
{
FloatImage * img = m->imageArray[i];
if (img == NULL) continue;
float sum = redScale + greenScale + blueScale + alphaScale;
redScale /= sum;
greenScale /= sum;
blueScale /= sum;
alphaScale /= sum;
float * restrict r = img->channel(0);
float * restrict g = img->channel(1);
float * restrict b = img->channel(2);
float * restrict a = img->channel(3);
const int count = img->width() * img->height();
for (int i = 0; i < count; i++)
{
float grey = r[i] * redScale + g[i] * greenScale + b[i] * blueScale + a[i] * alphaScale;
a[i] = b[i] = g[i] = r[i] = grey;
}
}
}
// Draw colored border.
void TexImage::setBorder(float r, float g, float b, float a)
{
detach();
foreach (i, m->imageArray)
{
FloatImage * img = m->imageArray[i];
if (img == NULL) continue;
const int w = img->width();
const int h = img->height();
for (int i = 0; i < w; i++)
{
img->setPixel(r, i, 0, 0);
img->setPixel(g, i, 0, 1);
img->setPixel(b, i, 0, 2);
img->setPixel(a, i, 0, 3);
img->setPixel(r, i, h-1, 0);
img->setPixel(g, i, h-1, 1);
img->setPixel(b, i, h-1, 2);
img->setPixel(a, i, h-1, 3);
}
for (int i = 0; i < h; i++)
{
img->setPixel(r, 0, i, 0);
img->setPixel(g, 0, i, 1);
img->setPixel(b, 0, i, 2);
img->setPixel(a, 0, i, 3);
img->setPixel(r, w-1, i, 0);
img->setPixel(g, w-1, i, 1);
img->setPixel(b, w-1, i, 2);
img->setPixel(a, w-1, i, 3);
}
}
}
// Fill image with the given color.
void TexImage::fill(float red, float green, float blue, float alpha)
{
detach();
foreach (i, m->imageArray)
{
FloatImage * img = m->imageArray[i];
if (img == NULL) continue;
float * restrict r = img->channel(0);
float * restrict g = img->channel(1);
float * restrict b = img->channel(2);
float * restrict a = img->channel(3);
const int count = img->width() * img->height();
for (int i = 0; i < count; i++)
{
r[i] = red;
g[i] = green;
b[i] = blue;
a[i] = alpha;
}
}
}
// Set normal map options.
void TexImage::toNormalMap(float sm, float medium, float big, float large)
{
detach();
foreach (i, m->imageArray)
{
if (m->imageArray[i] == NULL) continue;
// @@ Not implemented.
}
}
void TexImage::toHeightMap()
{
detach();
foreach (i, m->imageArray)
{
if (m->imageArray[i] == NULL) continue;
// @@ Not implemented.
}
}
void TexImage::normalizeNormalMap()
{
//nvCheck(m->isNormalMap);
detach();
foreach (i, m->imageArray)
{
if (m->imageArray[i] == NULL) continue;
nv::normalizeNormalMap(m->imageArray[i]);
}
}
// Compress.
void TexImage::outputCompressed(const CompressionOptions & compressionOptions, const OutputOptions & outputOptions)
{
foreach (i, m->imageArray)
{
if (m->imageArray[i] == NULL) continue;
// @@ Not implemented.
}
}

View File

@ -1,76 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_TEXTURE_H
#define NV_TT_TEXTURE_H
#include "nvtt.h"
#include <nvcore/Containers.h>
#include <nvcore/RefCounted.h>
#include <nvcore/Ptr.h>
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
namespace nvtt
{
struct TexImage::Private : public nv::RefCounted
{
Private()
{
type = TextureType_2D;
wrapMode = WrapMode_Mirror;
alphaMode = AlphaMode_None;
isNormalMap = false;
imageArray.resize(1, NULL);
}
Private(const Private & p)
{
type = p.type;
wrapMode = p.wrapMode;
alphaMode = p.alphaMode;
isNormalMap = p.isNormalMap;
imageArray = p.imageArray;
}
~Private()
{
// @@ Free images.
}
TextureType type;
WrapMode wrapMode;
AlphaMode alphaMode;
bool isNormalMap;
nv::Array<nv::FloatImage *> imageArray;
};
} // nvtt namespace
#endif // NV_TT_TEXTURE_H

View File

@ -1117,770 +1117,3 @@ const static uint s_bitmapTable[992] =
0x55555557,
0x55555557,
};
/*
void precomp()
{
unsigned int bitmaps[1024];
int num = 0;
printf("const static uint s_bitmapTableCTX[704] =\n{\n");
for (int a = 1; a <= 15; a++)
{
for (int b = a; b <= 15; b++)
{
for (int c = b; c <= 15; c++)
{
int indices[16];
int i = 0;
for(; i < a; i++) {
indices[i] = 0;
}
for(; i < a+b; i++) {
indices[i] = 2;
}
for(; i < a+b+c; i++) {
indices[i] = 3;
}
for(; i < 16; i++) {
indices[i] = 1;
}
unsigned int bm = 0;
for(i = 0; i < 16; i++) {
bm |= indices[i] << (i * 2);
}
printf("\t0x%8X, // %d %d %d %d\n", bm, a-0, b-a, c-b, 16-c);
bitmaps[num] = bm;
num++;
}
}
}
// Align to 32: 680 -> 704
while (num < 704)
{
printf("\t0x80000000,\n");
bitmaps[num] = 0x80000000; // 15 0 0 1;
num++;
}
printf("}; // num = %d\n", num);
}
*/
const static uint s_bitmapTableCTX[704] =
{
0x55555578, // 1 0 0 15
0x555555F8, // 1 0 1 14
0x555557F8, // 1 0 2 13
0x55555FF8, // 1 0 3 12
0x55557FF8, // 1 0 4 11
0x5555FFF8, // 1 0 5 10
0x5557FFF8, // 1 0 6 9
0x555FFFF8, // 1 0 7 8
0x557FFFF8, // 1 0 8 7
0x55FFFFF8, // 1 0 9 6
0x57FFFFF8, // 1 0 10 5
0x5FFFFFF8, // 1 0 11 4
0x7FFFFFF8, // 1 0 12 3
0xFFFFFFF8, // 1 0 13 2
0xFFFFFFF8, // 1 0 14 1
0x555557E8, // 1 1 0 14
0x55555FE8, // 1 1 1 13
0x55557FE8, // 1 1 2 12
0x5555FFE8, // 1 1 3 11
0x5557FFE8, // 1 1 4 10
0x555FFFE8, // 1 1 5 9
0x557FFFE8, // 1 1 6 8
0x55FFFFE8, // 1 1 7 7
0x57FFFFE8, // 1 1 8 6
0x5FFFFFE8, // 1 1 9 5
0x7FFFFFE8, // 1 1 10 4
0xFFFFFFE8, // 1 1 11 3
0xFFFFFFE8, // 1 1 12 2
0xFFFFFFE8, // 1 1 13 1
0x55557FA8, // 1 2 0 13
0x5555FFA8, // 1 2 1 12
0x5557FFA8, // 1 2 2 11
0x555FFFA8, // 1 2 3 10
0x557FFFA8, // 1 2 4 9
0x55FFFFA8, // 1 2 5 8
0x57FFFFA8, // 1 2 6 7
0x5FFFFFA8, // 1 2 7 6
0x7FFFFFA8, // 1 2 8 5
0xFFFFFFA8, // 1 2 9 4
0xFFFFFFA8, // 1 2 10 3
0xFFFFFFA8, // 1 2 11 2
0xFFFFFFA8, // 1 2 12 1
0x5557FEA8, // 1 3 0 12
0x555FFEA8, // 1 3 1 11
0x557FFEA8, // 1 3 2 10
0x55FFFEA8, // 1 3 3 9
0x57FFFEA8, // 1 3 4 8
0x5FFFFEA8, // 1 3 5 7
0x7FFFFEA8, // 1 3 6 6
0xFFFFFEA8, // 1 3 7 5
0xFFFFFEA8, // 1 3 8 4
0xFFFFFEA8, // 1 3 9 3
0xFFFFFEA8, // 1 3 10 2
0xFFFFFEA8, // 1 3 11 1
0x557FFAA8, // 1 4 0 11
0x55FFFAA8, // 1 4 1 10
0x57FFFAA8, // 1 4 2 9
0x5FFFFAA8, // 1 4 3 8
0x7FFFFAA8, // 1 4 4 7
0xFFFFFAA8, // 1 4 5 6
0xFFFFFAA8, // 1 4 6 5
0xFFFFFAA8, // 1 4 7 4
0xFFFFFAA8, // 1 4 8 3
0xFFFFFAA8, // 1 4 9 2
0xFFFFFAA8, // 1 4 10 1
0x57FFEAA8, // 1 5 0 10
0x5FFFEAA8, // 1 5 1 9
0x7FFFEAA8, // 1 5 2 8
0xFFFFEAA8, // 1 5 3 7
0xFFFFEAA8, // 1 5 4 6
0xFFFFEAA8, // 1 5 5 5
0xFFFFEAA8, // 1 5 6 4
0xFFFFEAA8, // 1 5 7 3
0xFFFFEAA8, // 1 5 8 2
0xFFFFEAA8, // 1 5 9 1
0x7FFFAAA8, // 1 6 0 9
0xFFFFAAA8, // 1 6 1 8
0xFFFFAAA8, // 1 6 2 7
0xFFFFAAA8, // 1 6 3 6
0xFFFFAAA8, // 1 6 4 5
0xFFFFAAA8, // 1 6 5 4
0xFFFFAAA8, // 1 6 6 3
0xFFFFAAA8, // 1 6 7 2
0xFFFFAAA8, // 1 6 8 1
0xFFFEAAA8, // 1 7 0 8
0xFFFEAAA8, // 1 7 1 7
0xFFFEAAA8, // 1 7 2 6
0xFFFEAAA8, // 1 7 3 5
0xFFFEAAA8, // 1 7 4 4
0xFFFEAAA8, // 1 7 5 3
0xFFFEAAA8, // 1 7 6 2
0xFFFEAAA8, // 1 7 7 1
0xFFFAAAA8, // 1 8 0 7
0xFFFAAAA8, // 1 8 1 6
0xFFFAAAA8, // 1 8 2 5
0xFFFAAAA8, // 1 8 3 4
0xFFFAAAA8, // 1 8 4 3
0xFFFAAAA8, // 1 8 5 2
0xFFFAAAA8, // 1 8 6 1
0xFFEAAAA8, // 1 9 0 6
0xFFEAAAA8, // 1 9 1 5
0xFFEAAAA8, // 1 9 2 4
0xFFEAAAA8, // 1 9 3 3
0xFFEAAAA8, // 1 9 4 2
0xFFEAAAA8, // 1 9 5 1
0xFFAAAAA8, // 1 10 0 5
0xFFAAAAA8, // 1 10 1 4
0xFFAAAAA8, // 1 10 2 3
0xFFAAAAA8, // 1 10 3 2
0xFFAAAAA8, // 1 10 4 1
0xFEAAAAA8, // 1 11 0 4
0xFEAAAAA8, // 1 11 1 3
0xFEAAAAA8, // 1 11 2 2
0xFEAAAAA8, // 1 11 3 1
0xFAAAAAA8, // 1 12 0 3
0xFAAAAAA8, // 1 12 1 2
0xFAAAAAA8, // 1 12 2 1
0xEAAAAAA8, // 1 13 0 2
0xEAAAAAA8, // 1 13 1 1
0xAAAAAAA8, // 1 14 0 1
0x55555FA0, // 2 0 0 14
0x55557FA0, // 2 0 1 13
0x5555FFA0, // 2 0 2 12
0x5557FFA0, // 2 0 3 11
0x555FFFA0, // 2 0 4 10
0x557FFFA0, // 2 0 5 9
0x55FFFFA0, // 2 0 6 8
0x57FFFFA0, // 2 0 7 7
0x5FFFFFA0, // 2 0 8 6
0x7FFFFFA0, // 2 0 9 5
0xFFFFFFA0, // 2 0 10 4
0xFFFFFFA0, // 2 0 11 3
0xFFFFFFA0, // 2 0 12 2
0xFFFFFFA0, // 2 0 13 1
0x5555FEA0, // 2 1 0 13
0x5557FEA0, // 2 1 1 12
0x555FFEA0, // 2 1 2 11
0x557FFEA0, // 2 1 3 10
0x55FFFEA0, // 2 1 4 9
0x57FFFEA0, // 2 1 5 8
0x5FFFFEA0, // 2 1 6 7
0x7FFFFEA0, // 2 1 7 6
0xFFFFFEA0, // 2 1 8 5
0xFFFFFEA0, // 2 1 9 4
0xFFFFFEA0, // 2 1 10 3
0xFFFFFEA0, // 2 1 11 2
0xFFFFFEA0, // 2 1 12 1
0x555FFAA0, // 2 2 0 12
0x557FFAA0, // 2 2 1 11
0x55FFFAA0, // 2 2 2 10
0x57FFFAA0, // 2 2 3 9
0x5FFFFAA0, // 2 2 4 8
0x7FFFFAA0, // 2 2 5 7
0xFFFFFAA0, // 2 2 6 6
0xFFFFFAA0, // 2 2 7 5
0xFFFFFAA0, // 2 2 8 4
0xFFFFFAA0, // 2 2 9 3
0xFFFFFAA0, // 2 2 10 2
0xFFFFFAA0, // 2 2 11 1
0x55FFEAA0, // 2 3 0 11
0x57FFEAA0, // 2 3 1 10
0x5FFFEAA0, // 2 3 2 9
0x7FFFEAA0, // 2 3 3 8
0xFFFFEAA0, // 2 3 4 7
0xFFFFEAA0, // 2 3 5 6
0xFFFFEAA0, // 2 3 6 5
0xFFFFEAA0, // 2 3 7 4
0xFFFFEAA0, // 2 3 8 3
0xFFFFEAA0, // 2 3 9 2
0xFFFFEAA0, // 2 3 10 1
0x5FFFAAA0, // 2 4 0 10
0x7FFFAAA0, // 2 4 1 9
0xFFFFAAA0, // 2 4 2 8
0xFFFFAAA0, // 2 4 3 7
0xFFFFAAA0, // 2 4 4 6
0xFFFFAAA0, // 2 4 5 5
0xFFFFAAA0, // 2 4 6 4
0xFFFFAAA0, // 2 4 7 3
0xFFFFAAA0, // 2 4 8 2
0xFFFFAAA0, // 2 4 9 1
0xFFFEAAA0, // 2 5 0 9
0xFFFEAAA0, // 2 5 1 8
0xFFFEAAA0, // 2 5 2 7
0xFFFEAAA0, // 2 5 3 6
0xFFFEAAA0, // 2 5 4 5
0xFFFEAAA0, // 2 5 5 4
0xFFFEAAA0, // 2 5 6 3
0xFFFEAAA0, // 2 5 7 2
0xFFFEAAA0, // 2 5 8 1
0xFFFAAAA0, // 2 6 0 8
0xFFFAAAA0, // 2 6 1 7
0xFFFAAAA0, // 2 6 2 6
0xFFFAAAA0, // 2 6 3 5
0xFFFAAAA0, // 2 6 4 4
0xFFFAAAA0, // 2 6 5 3
0xFFFAAAA0, // 2 6 6 2
0xFFFAAAA0, // 2 6 7 1
0xFFEAAAA0, // 2 7 0 7
0xFFEAAAA0, // 2 7 1 6
0xFFEAAAA0, // 2 7 2 5
0xFFEAAAA0, // 2 7 3 4
0xFFEAAAA0, // 2 7 4 3
0xFFEAAAA0, // 2 7 5 2
0xFFEAAAA0, // 2 7 6 1
0xFFAAAAA0, // 2 8 0 6
0xFFAAAAA0, // 2 8 1 5
0xFFAAAAA0, // 2 8 2 4
0xFFAAAAA0, // 2 8 3 3
0xFFAAAAA0, // 2 8 4 2
0xFFAAAAA0, // 2 8 5 1
0xFEAAAAA0, // 2 9 0 5
0xFEAAAAA0, // 2 9 1 4
0xFEAAAAA0, // 2 9 2 3
0xFEAAAAA0, // 2 9 3 2
0xFEAAAAA0, // 2 9 4 1
0xFAAAAAA0, // 2 10 0 4
0xFAAAAAA0, // 2 10 1 3
0xFAAAAAA0, // 2 10 2 2
0xFAAAAAA0, // 2 10 3 1
0xEAAAAAA0, // 2 11 0 3
0xEAAAAAA0, // 2 11 1 2
0xEAAAAAA0, // 2 11 2 1
0xAAAAAAA0, // 2 12 0 2
0xAAAAAAA0, // 2 12 1 1
0xAAAAAAA0, // 2 13 0 1
0x5557FA80, // 3 0 0 13
0x555FFA80, // 3 0 1 12
0x557FFA80, // 3 0 2 11
0x55FFFA80, // 3 0 3 10
0x57FFFA80, // 3 0 4 9
0x5FFFFA80, // 3 0 5 8
0x7FFFFA80, // 3 0 6 7
0xFFFFFA80, // 3 0 7 6
0xFFFFFA80, // 3 0 8 5
0xFFFFFA80, // 3 0 9 4
0xFFFFFA80, // 3 0 10 3
0xFFFFFA80, // 3 0 11 2
0xFFFFFA80, // 3 0 12 1
0x557FEA80, // 3 1 0 12
0x55FFEA80, // 3 1 1 11
0x57FFEA80, // 3 1 2 10
0x5FFFEA80, // 3 1 3 9
0x7FFFEA80, // 3 1 4 8
0xFFFFEA80, // 3 1 5 7
0xFFFFEA80, // 3 1 6 6
0xFFFFEA80, // 3 1 7 5
0xFFFFEA80, // 3 1 8 4
0xFFFFEA80, // 3 1 9 3
0xFFFFEA80, // 3 1 10 2
0xFFFFEA80, // 3 1 11 1
0x57FFAA80, // 3 2 0 11
0x5FFFAA80, // 3 2 1 10
0x7FFFAA80, // 3 2 2 9
0xFFFFAA80, // 3 2 3 8
0xFFFFAA80, // 3 2 4 7
0xFFFFAA80, // 3 2 5 6
0xFFFFAA80, // 3 2 6 5
0xFFFFAA80, // 3 2 7 4
0xFFFFAA80, // 3 2 8 3
0xFFFFAA80, // 3 2 9 2
0xFFFFAA80, // 3 2 10 1
0x7FFEAA80, // 3 3 0 10
0xFFFEAA80, // 3 3 1 9
0xFFFEAA80, // 3 3 2 8
0xFFFEAA80, // 3 3 3 7
0xFFFEAA80, // 3 3 4 6
0xFFFEAA80, // 3 3 5 5
0xFFFEAA80, // 3 3 6 4
0xFFFEAA80, // 3 3 7 3
0xFFFEAA80, // 3 3 8 2
0xFFFEAA80, // 3 3 9 1
0xFFFAAA80, // 3 4 0 9
0xFFFAAA80, // 3 4 1 8
0xFFFAAA80, // 3 4 2 7
0xFFFAAA80, // 3 4 3 6
0xFFFAAA80, // 3 4 4 5
0xFFFAAA80, // 3 4 5 4
0xFFFAAA80, // 3 4 6 3
0xFFFAAA80, // 3 4 7 2
0xFFFAAA80, // 3 4 8 1
0xFFEAAA80, // 3 5 0 8
0xFFEAAA80, // 3 5 1 7
0xFFEAAA80, // 3 5 2 6
0xFFEAAA80, // 3 5 3 5
0xFFEAAA80, // 3 5 4 4
0xFFEAAA80, // 3 5 5 3
0xFFEAAA80, // 3 5 6 2
0xFFEAAA80, // 3 5 7 1
0xFFAAAA80, // 3 6 0 7
0xFFAAAA80, // 3 6 1 6
0xFFAAAA80, // 3 6 2 5
0xFFAAAA80, // 3 6 3 4
0xFFAAAA80, // 3 6 4 3
0xFFAAAA80, // 3 6 5 2
0xFFAAAA80, // 3 6 6 1
0xFEAAAA80, // 3 7 0 6
0xFEAAAA80, // 3 7 1 5
0xFEAAAA80, // 3 7 2 4
0xFEAAAA80, // 3 7 3 3
0xFEAAAA80, // 3 7 4 2
0xFEAAAA80, // 3 7 5 1
0xFAAAAA80, // 3 8 0 5
0xFAAAAA80, // 3 8 1 4
0xFAAAAA80, // 3 8 2 3
0xFAAAAA80, // 3 8 3 2
0xFAAAAA80, // 3 8 4 1
0xEAAAAA80, // 3 9 0 4
0xEAAAAA80, // 3 9 1 3
0xEAAAAA80, // 3 9 2 2
0xEAAAAA80, // 3 9 3 1
0xAAAAAA80, // 3 10 0 3
0xAAAAAA80, // 3 10 1 2
0xAAAAAA80, // 3 10 2 1
0xAAAAAA80, // 3 11 0 2
0xAAAAAA80, // 3 11 1 1
0xAAAAAA80, // 3 12 0 1
0x55FFAA00, // 4 0 0 12
0x57FFAA00, // 4 0 1 11
0x5FFFAA00, // 4 0 2 10
0x7FFFAA00, // 4 0 3 9
0xFFFFAA00, // 4 0 4 8
0xFFFFAA00, // 4 0 5 7
0xFFFFAA00, // 4 0 6 6
0xFFFFAA00, // 4 0 7 5
0xFFFFAA00, // 4 0 8 4
0xFFFFAA00, // 4 0 9 3
0xFFFFAA00, // 4 0 10 2
0xFFFFAA00, // 4 0 11 1
0x5FFEAA00, // 4 1 0 11
0x7FFEAA00, // 4 1 1 10
0xFFFEAA00, // 4 1 2 9
0xFFFEAA00, // 4 1 3 8
0xFFFEAA00, // 4 1 4 7
0xFFFEAA00, // 4 1 5 6
0xFFFEAA00, // 4 1 6 5
0xFFFEAA00, // 4 1 7 4
0xFFFEAA00, // 4 1 8 3
0xFFFEAA00, // 4 1 9 2
0xFFFEAA00, // 4 1 10 1
0xFFFAAA00, // 4 2 0 10
0xFFFAAA00, // 4 2 1 9
0xFFFAAA00, // 4 2 2 8
0xFFFAAA00, // 4 2 3 7
0xFFFAAA00, // 4 2 4 6
0xFFFAAA00, // 4 2 5 5
0xFFFAAA00, // 4 2 6 4
0xFFFAAA00, // 4 2 7 3
0xFFFAAA00, // 4 2 8 2
0xFFFAAA00, // 4 2 9 1
0xFFEAAA00, // 4 3 0 9
0xFFEAAA00, // 4 3 1 8
0xFFEAAA00, // 4 3 2 7
0xFFEAAA00, // 4 3 3 6
0xFFEAAA00, // 4 3 4 5
0xFFEAAA00, // 4 3 5 4
0xFFEAAA00, // 4 3 6 3
0xFFEAAA00, // 4 3 7 2
0xFFEAAA00, // 4 3 8 1
0xFFAAAA00, // 4 4 0 8
0xFFAAAA00, // 4 4 1 7
0xFFAAAA00, // 4 4 2 6
0xFFAAAA00, // 4 4 3 5
0xFFAAAA00, // 4 4 4 4
0xFFAAAA00, // 4 4 5 3
0xFFAAAA00, // 4 4 6 2
0xFFAAAA00, // 4 4 7 1
0xFEAAAA00, // 4 5 0 7
0xFEAAAA00, // 4 5 1 6
0xFEAAAA00, // 4 5 2 5
0xFEAAAA00, // 4 5 3 4
0xFEAAAA00, // 4 5 4 3
0xFEAAAA00, // 4 5 5 2
0xFEAAAA00, // 4 5 6 1
0xFAAAAA00, // 4 6 0 6
0xFAAAAA00, // 4 6 1 5
0xFAAAAA00, // 4 6 2 4
0xFAAAAA00, // 4 6 3 3
0xFAAAAA00, // 4 6 4 2
0xFAAAAA00, // 4 6 5 1
0xEAAAAA00, // 4 7 0 5
0xEAAAAA00, // 4 7 1 4
0xEAAAAA00, // 4 7 2 3
0xEAAAAA00, // 4 7 3 2
0xEAAAAA00, // 4 7 4 1
0xAAAAAA00, // 4 8 0 4
0xAAAAAA00, // 4 8 1 3
0xAAAAAA00, // 4 8 2 2
0xAAAAAA00, // 4 8 3 1
0xAAAAAA00, // 4 9 0 3
0xAAAAAA00, // 4 9 1 2
0xAAAAAA00, // 4 9 2 1
0xAAAAAA00, // 4 10 0 2
0xAAAAAA00, // 4 10 1 1
0xAAAAAA00, // 4 11 0 1
0x7FFAA800, // 5 0 0 11
0xFFFAA800, // 5 0 1 10
0xFFFAA800, // 5 0 2 9
0xFFFAA800, // 5 0 3 8
0xFFFAA800, // 5 0 4 7
0xFFFAA800, // 5 0 5 6
0xFFFAA800, // 5 0 6 5
0xFFFAA800, // 5 0 7 4
0xFFFAA800, // 5 0 8 3
0xFFFAA800, // 5 0 9 2
0xFFFAA800, // 5 0 10 1
0xFFEAA800, // 5 1 0 10
0xFFEAA800, // 5 1 1 9
0xFFEAA800, // 5 1 2 8
0xFFEAA800, // 5 1 3 7
0xFFEAA800, // 5 1 4 6
0xFFEAA800, // 5 1 5 5
0xFFEAA800, // 5 1 6 4
0xFFEAA800, // 5 1 7 3
0xFFEAA800, // 5 1 8 2
0xFFEAA800, // 5 1 9 1
0xFFAAA800, // 5 2 0 9
0xFFAAA800, // 5 2 1 8
0xFFAAA800, // 5 2 2 7
0xFFAAA800, // 5 2 3 6
0xFFAAA800, // 5 2 4 5
0xFFAAA800, // 5 2 5 4
0xFFAAA800, // 5 2 6 3
0xFFAAA800, // 5 2 7 2
0xFFAAA800, // 5 2 8 1
0xFEAAA800, // 5 3 0 8
0xFEAAA800, // 5 3 1 7
0xFEAAA800, // 5 3 2 6
0xFEAAA800, // 5 3 3 5
0xFEAAA800, // 5 3 4 4
0xFEAAA800, // 5 3 5 3
0xFEAAA800, // 5 3 6 2
0xFEAAA800, // 5 3 7 1
0xFAAAA800, // 5 4 0 7
0xFAAAA800, // 5 4 1 6
0xFAAAA800, // 5 4 2 5
0xFAAAA800, // 5 4 3 4
0xFAAAA800, // 5 4 4 3
0xFAAAA800, // 5 4 5 2
0xFAAAA800, // 5 4 6 1
0xEAAAA800, // 5 5 0 6
0xEAAAA800, // 5 5 1 5
0xEAAAA800, // 5 5 2 4
0xEAAAA800, // 5 5 3 3
0xEAAAA800, // 5 5 4 2
0xEAAAA800, // 5 5 5 1
0xAAAAA800, // 5 6 0 5
0xAAAAA800, // 5 6 1 4
0xAAAAA800, // 5 6 2 3
0xAAAAA800, // 5 6 3 2
0xAAAAA800, // 5 6 4 1
0xAAAAA800, // 5 7 0 4
0xAAAAA800, // 5 7 1 3
0xAAAAA800, // 5 7 2 2
0xAAAAA800, // 5 7 3 1
0xAAAAA800, // 5 8 0 3
0xAAAAA800, // 5 8 1 2
0xAAAAA800, // 5 8 2 1
0xAAAAA800, // 5 9 0 2
0xAAAAA800, // 5 9 1 1
0xAAAAA800, // 5 10 0 1
0xFFAAA000, // 6 0 0 10
0xFFAAA000, // 6 0 1 9
0xFFAAA000, // 6 0 2 8
0xFFAAA000, // 6 0 3 7
0xFFAAA000, // 6 0 4 6
0xFFAAA000, // 6 0 5 5
0xFFAAA000, // 6 0 6 4
0xFFAAA000, // 6 0 7 3
0xFFAAA000, // 6 0 8 2
0xFFAAA000, // 6 0 9 1
0xFEAAA000, // 6 1 0 9
0xFEAAA000, // 6 1 1 8
0xFEAAA000, // 6 1 2 7
0xFEAAA000, // 6 1 3 6
0xFEAAA000, // 6 1 4 5
0xFEAAA000, // 6 1 5 4
0xFEAAA000, // 6 1 6 3
0xFEAAA000, // 6 1 7 2
0xFEAAA000, // 6 1 8 1
0xFAAAA000, // 6 2 0 8
0xFAAAA000, // 6 2 1 7
0xFAAAA000, // 6 2 2 6
0xFAAAA000, // 6 2 3 5
0xFAAAA000, // 6 2 4 4
0xFAAAA000, // 6 2 5 3
0xFAAAA000, // 6 2 6 2
0xFAAAA000, // 6 2 7 1
0xEAAAA000, // 6 3 0 7
0xEAAAA000, // 6 3 1 6
0xEAAAA000, // 6 3 2 5
0xEAAAA000, // 6 3 3 4
0xEAAAA000, // 6 3 4 3
0xEAAAA000, // 6 3 5 2
0xEAAAA000, // 6 3 6 1
0xAAAAA000, // 6 4 0 6
0xAAAAA000, // 6 4 1 5
0xAAAAA000, // 6 4 2 4
0xAAAAA000, // 6 4 3 3
0xAAAAA000, // 6 4 4 2
0xAAAAA000, // 6 4 5 1
0xAAAAA000, // 6 5 0 5
0xAAAAA000, // 6 5 1 4
0xAAAAA000, // 6 5 2 3
0xAAAAA000, // 6 5 3 2
0xAAAAA000, // 6 5 4 1
0xAAAAA000, // 6 6 0 4
0xAAAAA000, // 6 6 1 3
0xAAAAA000, // 6 6 2 2
0xAAAAA000, // 6 6 3 1
0xAAAAA000, // 6 7 0 3
0xAAAAA000, // 6 7 1 2
0xAAAAA000, // 6 7 2 1
0xAAAAA000, // 6 8 0 2
0xAAAAA000, // 6 8 1 1
0xAAAAA000, // 6 9 0 1
0xFAAA8000, // 7 0 0 9
0xFAAA8000, // 7 0 1 8
0xFAAA8000, // 7 0 2 7
0xFAAA8000, // 7 0 3 6
0xFAAA8000, // 7 0 4 5
0xFAAA8000, // 7 0 5 4
0xFAAA8000, // 7 0 6 3
0xFAAA8000, // 7 0 7 2
0xFAAA8000, // 7 0 8 1
0xEAAA8000, // 7 1 0 8
0xEAAA8000, // 7 1 1 7
0xEAAA8000, // 7 1 2 6
0xEAAA8000, // 7 1 3 5
0xEAAA8000, // 7 1 4 4
0xEAAA8000, // 7 1 5 3
0xEAAA8000, // 7 1 6 2
0xEAAA8000, // 7 1 7 1
0xAAAA8000, // 7 2 0 7
0xAAAA8000, // 7 2 1 6
0xAAAA8000, // 7 2 2 5
0xAAAA8000, // 7 2 3 4
0xAAAA8000, // 7 2 4 3
0xAAAA8000, // 7 2 5 2
0xAAAA8000, // 7 2 6 1
0xAAAA8000, // 7 3 0 6
0xAAAA8000, // 7 3 1 5
0xAAAA8000, // 7 3 2 4
0xAAAA8000, // 7 3 3 3
0xAAAA8000, // 7 3 4 2
0xAAAA8000, // 7 3 5 1
0xAAAA8000, // 7 4 0 5
0xAAAA8000, // 7 4 1 4
0xAAAA8000, // 7 4 2 3
0xAAAA8000, // 7 4 3 2
0xAAAA8000, // 7 4 4 1
0xAAAA8000, // 7 5 0 4
0xAAAA8000, // 7 5 1 3
0xAAAA8000, // 7 5 2 2
0xAAAA8000, // 7 5 3 1
0xAAAA8000, // 7 6 0 3
0xAAAA8000, // 7 6 1 2
0xAAAA8000, // 7 6 2 1
0xAAAA8000, // 7 7 0 2
0xAAAA8000, // 7 7 1 1
0xAAAA8000, // 7 8 0 1
0xAAAA0000, // 8 0 0 8
0xAAAA0000, // 8 0 1 7
0xAAAA0000, // 8 0 2 6
0xAAAA0000, // 8 0 3 5
0xAAAA0000, // 8 0 4 4
0xAAAA0000, // 8 0 5 3
0xAAAA0000, // 8 0 6 2
0xAAAA0000, // 8 0 7 1
0xAAAA0000, // 8 1 0 7
0xAAAA0000, // 8 1 1 6
0xAAAA0000, // 8 1 2 5
0xAAAA0000, // 8 1 3 4
0xAAAA0000, // 8 1 4 3
0xAAAA0000, // 8 1 5 2
0xAAAA0000, // 8 1 6 1
0xAAAA0000, // 8 2 0 6
0xAAAA0000, // 8 2 1 5
0xAAAA0000, // 8 2 2 4
0xAAAA0000, // 8 2 3 3
0xAAAA0000, // 8 2 4 2
0xAAAA0000, // 8 2 5 1
0xAAAA0000, // 8 3 0 5
0xAAAA0000, // 8 3 1 4
0xAAAA0000, // 8 3 2 3
0xAAAA0000, // 8 3 3 2
0xAAAA0000, // 8 3 4 1
0xAAAA0000, // 8 4 0 4
0xAAAA0000, // 8 4 1 3
0xAAAA0000, // 8 4 2 2
0xAAAA0000, // 8 4 3 1
0xAAAA0000, // 8 5 0 3
0xAAAA0000, // 8 5 1 2
0xAAAA0000, // 8 5 2 1
0xAAAA0000, // 8 6 0 2
0xAAAA0000, // 8 6 1 1
0xAAAA0000, // 8 7 0 1
0xAAA80000, // 9 0 0 7
0xAAA80000, // 9 0 1 6
0xAAA80000, // 9 0 2 5
0xAAA80000, // 9 0 3 4
0xAAA80000, // 9 0 4 3
0xAAA80000, // 9 0 5 2
0xAAA80000, // 9 0 6 1
0xAAA80000, // 9 1 0 6
0xAAA80000, // 9 1 1 5
0xAAA80000, // 9 1 2 4
0xAAA80000, // 9 1 3 3
0xAAA80000, // 9 1 4 2
0xAAA80000, // 9 1 5 1
0xAAA80000, // 9 2 0 5
0xAAA80000, // 9 2 1 4
0xAAA80000, // 9 2 2 3
0xAAA80000, // 9 2 3 2
0xAAA80000, // 9 2 4 1
0xAAA80000, // 9 3 0 4
0xAAA80000, // 9 3 1 3
0xAAA80000, // 9 3 2 2
0xAAA80000, // 9 3 3 1
0xAAA80000, // 9 4 0 3
0xAAA80000, // 9 4 1 2
0xAAA80000, // 9 4 2 1
0xAAA80000, // 9 5 0 2
0xAAA80000, // 9 5 1 1
0xAAA80000, // 9 6 0 1
0xAAA00000, // 10 0 0 6
0xAAA00000, // 10 0 1 5
0xAAA00000, // 10 0 2 4
0xAAA00000, // 10 0 3 3
0xAAA00000, // 10 0 4 2
0xAAA00000, // 10 0 5 1
0xAAA00000, // 10 1 0 5
0xAAA00000, // 10 1 1 4
0xAAA00000, // 10 1 2 3
0xAAA00000, // 10 1 3 2
0xAAA00000, // 10 1 4 1
0xAAA00000, // 10 2 0 4
0xAAA00000, // 10 2 1 3
0xAAA00000, // 10 2 2 2
0xAAA00000, // 10 2 3 1
0xAAA00000, // 10 3 0 3
0xAAA00000, // 10 3 1 2
0xAAA00000, // 10 3 2 1
0xAAA00000, // 10 4 0 2
0xAAA00000, // 10 4 1 1
0xAAA00000, // 10 5 0 1
0xAA800000, // 11 0 0 5
0xAA800000, // 11 0 1 4
0xAA800000, // 11 0 2 3
0xAA800000, // 11 0 3 2
0xAA800000, // 11 0 4 1
0xAA800000, // 11 1 0 4
0xAA800000, // 11 1 1 3
0xAA800000, // 11 1 2 2
0xAA800000, // 11 1 3 1
0xAA800000, // 11 2 0 3
0xAA800000, // 11 2 1 2
0xAA800000, // 11 2 2 1
0xAA800000, // 11 3 0 2
0xAA800000, // 11 3 1 1
0xAA800000, // 11 4 0 1
0xAA000000, // 12 0 0 4
0xAA000000, // 12 0 1 3
0xAA000000, // 12 0 2 2
0xAA000000, // 12 0 3 1
0xAA000000, // 12 1 0 3
0xAA000000, // 12 1 1 2
0xAA000000, // 12 1 2 1
0xAA000000, // 12 2 0 2
0xAA000000, // 12 2 1 1
0xAA000000, // 12 3 0 1
0xA8000000, // 13 0 0 3
0xA8000000, // 13 0 1 2
0xA8000000, // 13 0 2 1
0xA8000000, // 13 1 0 2
0xA8000000, // 13 1 1 1
0xA8000000, // 13 2 0 1
0xA0000000, // 14 0 0 2
0xA0000000, // 14 0 1 1
0xA0000000, // 14 1 0 1
0x80000000, // 15 0 0 1
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
0x80000000,
};

View File

@ -21,8 +21,10 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <float.h> // FLT_MAX
#include "CudaMath.h"
@ -51,57 +53,65 @@ __device__ inline void swap(T & a, T & b)
__constant__ float3 kColorMetric = { 1.0f, 1.0f, 1.0f };
__constant__ float3 kColorMetricSqr = { 1.0f, 1.0f, 1.0f };
// Some kernels read the input through texture.
texture<uchar4, 2, cudaReadModeNormalizedFloat> tex;
////////////////////////////////////////////////////////////////////////////////
// Sort colors
////////////////////////////////////////////////////////////////////////////////
__device__ void sortColors(const float * values, int * ranks)
__device__ void sortColors(const float * values, int * cmp)
{
#if __DEVICE_EMULATION__
if (threadIdx.x == 0)
{
for (int tid = 0; tid < 16; tid++)
{
int rank = 0;
for (int i = 0; i < 16; i++)
{
rank += (values[i] < values[tid]);
}
ranks[tid] = rank;
}
int tid = threadIdx.x;
// Resolve elements with the same index.
for (int i = 0; i < 15; i++)
{
for (int tid = 0; tid < 16; tid++)
{
if (tid > i && ranks[tid] == ranks[i]) ++ranks[tid];
}
}
}
#if 1
cmp[tid] = (values[0] < values[tid]);
cmp[tid] += (values[1] < values[tid]);
cmp[tid] += (values[2] < values[tid]);
cmp[tid] += (values[3] < values[tid]);
cmp[tid] += (values[4] < values[tid]);
cmp[tid] += (values[5] < values[tid]);
cmp[tid] += (values[6] < values[tid]);
cmp[tid] += (values[7] < values[tid]);
cmp[tid] += (values[8] < values[tid]);
cmp[tid] += (values[9] < values[tid]);
cmp[tid] += (values[10] < values[tid]);
cmp[tid] += (values[11] < values[tid]);
cmp[tid] += (values[12] < values[tid]);
cmp[tid] += (values[13] < values[tid]);
cmp[tid] += (values[14] < values[tid]);
cmp[tid] += (values[15] < values[tid]);
// Resolve elements with the same index.
if (tid > 0 && cmp[tid] == cmp[0]) ++cmp[tid];
if (tid > 1 && cmp[tid] == cmp[1]) ++cmp[tid];
if (tid > 2 && cmp[tid] == cmp[2]) ++cmp[tid];
if (tid > 3 && cmp[tid] == cmp[3]) ++cmp[tid];
if (tid > 4 && cmp[tid] == cmp[4]) ++cmp[tid];
if (tid > 5 && cmp[tid] == cmp[5]) ++cmp[tid];
if (tid > 6 && cmp[tid] == cmp[6]) ++cmp[tid];
if (tid > 7 && cmp[tid] == cmp[7]) ++cmp[tid];
if (tid > 8 && cmp[tid] == cmp[8]) ++cmp[tid];
if (tid > 9 && cmp[tid] == cmp[9]) ++cmp[tid];
if (tid > 10 && cmp[tid] == cmp[10]) ++cmp[tid];
if (tid > 11 && cmp[tid] == cmp[11]) ++cmp[tid];
if (tid > 12 && cmp[tid] == cmp[12]) ++cmp[tid];
if (tid > 13 && cmp[tid] == cmp[13]) ++cmp[tid];
if (tid > 14 && cmp[tid] == cmp[14]) ++cmp[tid];
#else
const int tid = threadIdx.x;
int rank = 0;
cmp[tid] = 0;
#pragma unroll
for (int i = 0; i < 16; i++)
{
rank += (values[i] < values[tid]);
}
ranks[tid] = rank;
#pragma unroll
for (int i = 0; i < 16; i++)
{
cmp[tid] += (values[i] < values[tid]);
}
// Resolve elements with the same index.
#pragma unroll
for (int i = 0; i < 15; i++)
{
if (tid > i && ranks[tid] == ranks[i]) ++ranks[tid];
}
// Resolve elements with the same index.
#pragma unroll
for (int i = 0; i < 15; i++)
{
if (tid > 0 && cmp[tid] == cmp[i]) ++cmp[tid];
}
#endif
}
@ -126,7 +136,9 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
// No need to synchronize, 16 < warp size.
__debugsync();
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Sort colors along the best fit line.
colorSums(colors, sums);
@ -136,74 +148,17 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
dps[idx] = dot(colors[idx], axis);
__debugsync();
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
sortColors(dps, xrefs);
float3 tmp = colors[idx];
__debugsync();
colors[xrefs[idx]] = tmp;
}
#if __DEVICE_EMULATION__
else
{
__debugsync();
__debugsync();
__debugsync();
}
#endif
}
__device__ void loadColorBlockTex(uint bn, uint w, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor)
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
__shared__ float dps[16];
if (idx < 16)
{
float x = 4 * ((bn + bid) % w) + idx % 4;
float y = 4 * ((bn + bid) / w) + idx / 4;
// Read color and copy to shared mem.
float4 c = tex2D(tex, x, y);
colors[idx].x = c.z;
colors[idx].y = c.y;
colors[idx].z = c.x;
// No need to synchronize, 16 < warp size.
__debugsync();
// Sort colors along the best fit line.
colorSums(colors, sums);
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
*sameColor = (axis == make_float3(0, 0, 0));
dps[idx] = dot(colors[idx], axis);
__debugsync();
sortColors(dps, xrefs);
float3 tmp = colors[idx];
__debugsync();
colors[xrefs[idx]] = tmp;
}
#if __DEVICE_EMULATION__
else
{
__debugsync();
__debugsync();
__debugsync();
}
#endif
}
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
{
const int bid = blockIdx.x;
@ -224,8 +179,11 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
colors[idx] = rawColors[idx] * weights[idx];
// No need to synchronize, 16 < warp size.
__debugsync();
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Sort colors along the best fit line.
colorSums(colors, sums);
@ -233,70 +191,23 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
*sameColor = (axis == make_float3(0, 0, 0));
// Single color compressor needs unweighted colors.
if (*sameColor) colors[idx] = rawColors[idx];
dps[idx] = dot(rawColors[idx], axis);
__debugsync();
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
sortColors(dps, xrefs);
float3 tmp = colors[idx];
float w = weights[idx];
__debugsync();
colors[xrefs[idx]] = tmp;
float w = weights[idx];
weights[xrefs[idx]] = w;
}
#if __DEVICE_EMULATION__
else
{
__debugsync();
__debugsync();
__debugsync();
}
#endif
}
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16], int * sameColor)
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
__shared__ float dps[16];
if (idx < 16)
{
// Read color and copy to shared mem.
uint c = image[(bid) * 16 + idx];
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
// No need to synchronize, 16 < warp size.
__debugsync();
// Sort colors along the best fit line.
colorSums(colors, sums);
float2 axis = bestFitLine(colors, sums[0]);
*sameColor = (axis == make_float2(0, 0));
dps[idx] = dot(colors[idx], axis);
__debugsync();
sortColors(dps, xrefs);
float2 tmp = colors[idx];
__debugsync();
colors[xrefs[idx]] = tmp;
}
#if __DEVICE_EMULATION__
else
{
__debugsync();
__debugsync();
__debugsync();
}
#endif
}
@ -305,33 +216,13 @@ __device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sum
////////////////////////////////////////////////////////////////////////////////
inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
{
uint x = __float2uint_rn(__saturatef(v.x) * 31.0f);
uint y = __float2uint_rn(__saturatef(v.y) * 63.0f);
uint z = __float2uint_rn(__saturatef(v.z) * 31.0f);
*w = (x << 11) | (y << 5) | z;
v.x = __uint2float_rn(x) * 1.0f / 31.0f;
v.y = __uint2float_rn(y) * 1.0f / 63.0f;
v.z = __uint2float_rn(z) * 1.0f / 31.0f;
return v;
}
inline __device__ float2 roundAndExpand56(float2 v, ushort * w)
{
uint x = __float2uint_rn(__saturatef(v.x) * 31.0f);
uint y = __float2uint_rn(__saturatef(v.y) * 63.0f);
*w = (x << 11) | (y << 5);
v.x = __uint2float_rn(x) * 1.0f / 31.0f;
v.y = __uint2float_rn(y) * 1.0f / 63.0f;
return v;
}
inline __device__ float2 roundAndExpand88(float2 v, ushort * w)
{
uint x = __float2uint_rn(__saturatef(v.x) * 255.0f);
uint y = __float2uint_rn(__saturatef(v.y) * 255.0f);
*w = (x << 8) | y;
v.x = __uint2float_rn(x) * 1.0f / 255.0f;
v.y = __uint2float_rn(y) * 1.0f / 255.0f;
v.x = rintf(__saturatef(v.x) * 31.0f);
v.y = rintf(__saturatef(v.y) * 63.0f);
v.z = rintf(__saturatef(v.z) * 31.0f);
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z;
v.x *= 0.03227752766457f; // approximate integer bit expansion.
v.y *= 0.01583151765563f;
v.z *= 0.03227752766457f;
return v;
}
@ -578,114 +469,6 @@ __device__ float evalPermutation3(const float3 * colors, const float * weights,
}
*/
__device__ float evalPermutation4(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
__device__ float evalPermutation3(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable3[bits & 3] * colors[i];
akku += prods3[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 4.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 4.0f) * (e.x + e.y);
}
__device__ float evalPermutationCTX(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 8-8 color and expand...
a = roundAndExpand88(a, start);
b = roundAndExpand88(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
////////////////////////////////////////////////////////////////////////////////
// Evaluate all permutations
@ -814,67 +597,6 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
}
*/
__device__ void evalAllPermutations(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
__shared__ uint s_permutations[160];
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
if (pidx < 160) s_permutations[pidx] = permutation;
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
for(int i = 0; i < 3; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 160) break;
ushort start, end;
uint permutation = s_permutations[pidx];
float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
if (bestStart > bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
}
}
}
errors[idx] = bestError;
}
__device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
@ -943,40 +665,6 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
errors[idx] = bestError;
}
__device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 704) break;
ushort start, end;
uint permutation = permutations[pidx];
float error = evalPermutationCTX(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
errors[idx] = bestError;
}
////////////////////////////////////////////////////////////////////////////////
// Find index with minimum error
@ -1004,6 +692,7 @@ __device__ int findMinError(float * errors)
}
}
}
#else
for(int d = NUM_THREADS/2; d > 32; d >>= 1)
{
@ -1086,11 +775,6 @@ __device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xr
result[bid].y = indices;
}
__device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result)
{
saveBlockDXT1(start, end, permutation, xrefs, result);
}
__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
{
const int bid = blockIdx.x;
@ -1114,41 +798,6 @@ __device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
}
}
__device__ void saveSingleColorBlockDXT1(float2 color, uint2 * result)
{
const int bid = blockIdx.x;
int r = color.x * 255;
int g = color.y * 255;
ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5);
ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5);
if (color0 < color1)
{
result[bid].x = (color0 << 16) | color1;
result[bid].y = 0xffffffff;
}
else
{
result[bid].x = (color1 << 16) | color0;
result[bid].y = 0xaaaaaaaa;
}
}
__device__ void saveSingleColorBlockCTX1(float2 color, uint2 * result)
{
const int bid = blockIdx.x;
int r = color.x * 255;
int g = color.y * 255;
ushort color0 = (r << 8) | (g);
result[bid].x = (color0 << 16) | color0;
result[bid].y = 0x00000000;
}
////////////////////////////////////////////////////////////////////////////////
// Compress color block
@ -1187,41 +836,6 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint
}
}
__global__ void compressDXT1_Tex(uint bn, uint w, const uint * permutations, uint2 * result)
{
__shared__ float3 colors[16];
__shared__ float3 sums[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlockTex(bn, w, colors, sums, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressLevel4DXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float3 colors[16];
@ -1292,75 +906,6 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
}
__global__ void compressNormalDXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressCTX1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockCTX1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutationsCTX(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockCTX1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
/*
__device__ float computeError(const float weights[16], uchar a0, uchar a1)
{
@ -1539,125 +1084,6 @@ __global__ void compressDXT5(const uint * permutations, const uint * image, uint
}
*/
/*__device__ void evaluatePalette(uint alpha0, uint alpha1, uint alphas[8])
{
alpha[0] = alpha0;
alpha[1] = alpha1;
alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7; // bit code 010
alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7; // bit code 011
alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7; // bit code 100
alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7; // bit code 101
alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7; // bit code 110
alpha[7] = (1 * alpha[0] + 6 * alpha[1]) / 7; // bit code 111
}
__device__ uint computeAlphaError(const uint block[16], uint alpha0, uint alpha1, int bestError = INT_MAX)
{
uint8 alphas[8];
evaluatePalette(alpha0, alpha1, alphas);
int totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = block[i];
// @@ It should be possible to do this much faster.
int minDist = INT_MAX;
for (uint p = 0; p < 8; p++)
{
int dist = alphaDistance(alpha, alphas[p]);
minDist = min(dist, minDist);
}
totalError += minDist;
if (totalError > bestError)
{
// early out
return totalError;
}
}
return totalError;
}
void compressDXT5A(uint alpha[16])
{
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
mina = min(mina, alpha[i]);
maxa = max(maxa, alpha[i]);
}
dxtBlock->alpha0 = maxa;
dxtBlock->alpha1 = mina;
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, dxtBlock);
int besta0 = maxa;
int besta1 = mina;
// Expand search space a bit.
const int alphaExpand = 8;
mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand;
maxa = (maxa <= 255-alphaExpand) ? 255 : maxa + alphaExpand;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
{
nvDebugCheck(a0 - a1 > 8);
dxtBlock->alpha0 = a0;
dxtBlock->alpha1 = a1;
int error = computeAlphaError(rgba, dxtBlock, besterror);
if (error < besterror)
{
besterror = error;
besta0 = a0;
besta1 = a1;
}
}
}
dxtBlock->alpha0 = besta0;
dxtBlock->alpha1 = besta1;
}
}
__global__ void compressDXT5n(uint blockNum, uint2 * d_result)
{
uint idx = blockIdx.x * 128 + threadIdx.x;
if (idx >= blockNum)
{
return;
}
// @@ Ideally we would load the data to shared mem to achieve coalesced global mem access.
// @@ Blocks would require too much shared memory (8k) and limit occupancy.
// @@ Ideally we should use SIMD processing, multiple threads (4-8) processing the same block.
// That simplifies coalescing, and reduces divergence.
// @@ Experiment with texture. That's probably the most simple approach.
uint x[16];
uint y[16];
}
*/
////////////////////////////////////////////////////////////////////////////////
// Setup kernel
////////////////////////////////////////////////////////////////////////////////
@ -1685,20 +1111,6 @@ extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result
compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressKernelDXT1_Tex(uint bn, uint blockNum, uint w, cudaArray * d_data, uint * d_result, uint * d_bitmaps)
{
// Setup texture
tex.normalized = false;
tex.filterMode = cudaFilterModePoint;
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;
cudaBindTextureToArray(tex, d_data);
compressDXT1_Tex<<<blockNum, NUM_THREADS>>>(bn, w, d_bitmaps, (uint2 *)d_result);
}
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressLevel4DXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
@ -1708,26 +1120,3 @@ extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint *
{
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressNormalDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressCTX1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressKernelDXT5n(uint blockNum, cudaArray * d_data, uint * d_result)
{
// Setup texture
tex.normalized = false;
tex.filterMode = cudaFilterModePoint;
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;
cudaBindTextureToArray(tex, d_data);
// compressDXT5n<<<blockNum/128, 128>>>(blockNum, (uint2 *)d_result);
}

File diff suppressed because it is too large Load Diff

View File

@ -42,17 +42,12 @@ namespace nv
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1_Tex(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressCTX1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
uint * m_bitmapTable;
uint * m_bitmapTableCTX;
uint * m_data;
uint * m_result;

View File

@ -26,6 +26,7 @@
#ifndef CUDAMATH_H
#define CUDAMATH_H
#include <float.h>
inline __device__ __host__ float3 operator *(float3 a, float3 b)
@ -86,69 +87,6 @@ inline __device__ __host__ bool operator ==(float3 a, float3 b)
return a.x == b.x && a.y == b.y && a.z == b.z;
}
// float2 operators
inline __device__ __host__ float2 operator *(float2 a, float2 b)
{
return make_float2(a.x*b.x, a.y*b.y);
}
inline __device__ __host__ float2 operator *(float f, float2 v)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator *(float2 v, float f)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator +(float2 a, float2 b)
{
return make_float2(a.x+b.x, a.y+b.y);
}
inline __device__ __host__ void operator +=(float2 & b, float2 a)
{
b.x += a.x;
b.y += a.y;
}
inline __device__ __host__ float2 operator -(float2 a, float2 b)
{
return make_float2(a.x-b.x, a.y-b.y);
}
inline __device__ __host__ void operator -=(float2 & b, float2 a)
{
b.x -= a.x;
b.y -= a.y;
}
inline __device__ __host__ float2 operator /(float2 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float2 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
}
inline __device__ __host__ bool operator ==(float2 a, float2 b)
{
return a.x == b.x && a.y == b.y;
}
inline __device__ __host__ float dot(float2 a, float2 b)
{
return a.x * b.x + a.y * b.y;
}
inline __device__ __host__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
@ -305,89 +243,5 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, fl
return firstEigenVector(covariance);
}
// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
{
// @@ 8 iterations is probably more than enough.
float2 v = make_float2(1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1];
float y = v.x * matrix[1] + v.y * matrix[2];
float m = max(x, y);
float iv = 1.0f / m;
if (m == 0.0f) iv = 0.0f;
v = make_float2(x*iv, y*iv);
}
return v;
}
inline __device__ void colorSums(const float2 * colors, float2 * sums)
{
#if __DEVICE_EMULATION__
float2 color_sum = make_float2(0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx];
sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1];
#endif
}
inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[3] = {0, 0, 0};
for (int i = 0; i < 16; i++)
{
float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[2] += a.y * a.y;
}
#else
const int idx = threadIdx.x;
float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
__shared__ float covariance[16*3];
covariance[3 * idx + 0] = diff.x * diff.x;
covariance[3 * idx + 1] = diff.x * diff.y;
covariance[3 * idx + 2] = diff.y * diff.y;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector2D(covariance);
}
#endif // CUDAMATH_H

View File

@ -1,60 +0,0 @@
#include "nvtt_experimental.h"
struct NvttTexture
{
NvttTexture() :
m_constant(false),
m_image(NULL),
m_floatImage(NULL)
{
}
~NvttTexture()
{
if (m_constant && m_image) m_image->unwrap();
delete m_image;
delete m_floatImage;
}
bool m_constant;
Image * m_image;
FloatImage * m_floatImage;
};
NvttTexture * nvttCreateTexture()
{
return new NvttTexture();
}
void nvttDestroyTexture(NvttTexture * tex)
{
delete tex;
}
void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data)
{
nvCheck(img != NULL);
if (format == NVTT_InputFormat_BGRA_8UB)
{
img->m_constant = false;
img->m_image->allocate(w, h);
memcpy(img->m_image->pixels(), data, w * h * 4);
}
else
{
nvCheck(false);
}
}
void nvttCompressImage(NvttImage * img, NvttFormat format)
{
nvCheck(img != NULL);
// @@ Invoke appropriate compressor.
}
#endif // NVTT_EXPERIMENTAL_H

View File

@ -1,103 +0,0 @@
#ifndef NVTT_EXPERIMENTAL_H
#define NVTT_EXPERIMENTAL_H
#include <nvtt/nvtt.h>
typedef struct NvttTexture NvttTexture;
typedef struct NvttOutputOptions NvttOutputOptions;
// Global functions
void nvttInitialize(...);
unsigned int nvttGetVersion();
const char * nvttGetErrorString(unsigned int error);
// Texture functions
NvttTexture * nvttCreateTexture();
void nvttDestroyTexture(NvttTexture * tex);
void nvttSetTexture2D(NvttTexture * tex, NvttInputFormat format, uint w, uint h, uint idx, void * data);
void nvttResize(NvttTexture * img, uint w, uint h);
unsigned int nvttDownsample(NvttTexture * img);
void nvttOutputCompressed(NvttTexture * img, NvttOutputFormat format);
void nvttOutputPixelFormat(NvttTexture * img, NvttOutputFormat format);
// How to control the compression parameters?
// Using many arguments:
// void nvttCompressImage(img, format, quality, r, g, b, a, ...);
// Using existing compression option class:
// compressionOptions = nvttCreateCompressionOptions();
// nvttSetCompressionOptionsFormat(compressionOptions, format);
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
// nvttSetCompressionOptionsColorWeights(compressionOptions, r, g, b, a);
// ...
// nvttCompressImage(img, compressionOptions);
// Using thread local context state:
// void nvttSetCompressionFormat(format);
// void nvttSetCompressionQuality(quality);
// void nvttSetCompressionColorWeights(r, g, b, a);
// ...
// nvttCompressImage(img);
// Using thread local context state, but with GL style function arguments:
// nvttCompressorParameteri(NVTT_FORMAT, format);
// nvttCompressorParameteri(NVTT_QUALITY, quality);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_RED, r);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_GREEN, g);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_BLUE, b);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_ALPHA, a);
// or nvttCompressorParameter4f(NVTT_COLOR_WEIGHTS, r, g, b, a);
// ...
// nvttCompressImage(img);
// How do we get the compressed output?
// - Using callbacks. (via new entrypoints, or through outputOptions)
// - Return it explicitely from nvttCompressImage.
// - Store it along the image, retrieve later explicitely with 'nvttGetCompressedData(img, ...)'
/*
// Global functions
void nvttInitialize(...);
unsigned int nvttGetVersion();
const char * nvttGetErrorString(unsigned int error);
// Context object
void nvttCreateContext();
void nvttDestroyContext();
void nvttSetParameter1i(unsigned int name, int value);
void nvttSetParameter1f(unsigned int name, float value);
void nvttSetParameter2f(unsigned int name, float v0, float v1);
void nvttSetParameter3f(unsigned int name, float v0, float v1, float v2);
void nvttSetParameter4f(unsigned int name, float v0, float v1, float v2, float v3);
// Image object
NvttImage * nvttCreateImage();
void nvttDestroyImage(NvttImage * img);
void nvttSetImageData(NvttImage * image, NvttInputFormat format, unsigned int w, unsigned int h, void * data);
void nvttSetImageParameter1i(NvttImage * image, unsigned int name, int value);
void nvttSetImageParameter1f(NvttImage * image, unsigned int name, float value);
void nvttResizeImage(NvttImage * image, unsigned int w, unsigned int h);
void nvttQuantizeImage(NvttImage * image, bool dither, unsigned int rbits, unsigned int gbits, unsigned int bbits, unsigned int abits);
void nvttCompressImage(NvttImage * image, void * buffer, int size);
*/
#endif // NVTT_EXPERIMENTAL_H

View File

@ -1,61 +0,0 @@
#include "nvtt_experimental.h"
/*
Errors in the original API:
- Too many memory copies.
- Implementation too complicated.
- Error output should not be in output options.
- Data driven interface. Follows the dialog model. Provide all the data upfront.
*/
// Output texture with mipmaps
void example0()
{
CompressionOptions compressionOptions;
OutputOptions outputOptions;
Texture img;
img.setTexture2D(format, w, h, 0, data);
Compressor context;
context.outputHeader(outputOptions);
context.outputCompressed(img, compressionOptions, outputOptions);
img.toLinear(2.2);
while (img.downsample(NVTT_FILTER_BOX))
{
img.toGamma(2.2);
outputCompressed(img, compressionOptions, outputOptions);
}
}
// Output texture with colored mipmaps
void example1()
{
CompressionOptions compressionOptions;
OutputOptions outputOptions;
Texture img;
img.setTexture2D(format, w, h, 0, data);
Compressor context;
context.outputHeader(outputOptions);
context.outputCompressed(img, compressionOptions, outputOptions);
img.toLinear(2.2);
while (img.downsample(NVTT_FILTER_BOX))
{
img.toGamma(2.2);
Texture mipmap = img;
mipmap.blend(color[i].r, color[i].g, color[i].b, 0.5f);
context.outputCompressed(mipmap, compressionOptions, outputOptions);
}
}

View File

@ -42,8 +42,6 @@ const char * nvtt::errorString(Error e)
return "Error opening file";
case Error_FileWrite:
return "Error writing through output handler";
case Error_UnsupportedOutputFormat:
return "The container file does not support the selected output format";
}
return "Invalid error";

View File

@ -47,15 +47,12 @@
# define NVTT_API
#endif
#define NVTT_VERSION 201
#define NVTT_VERSION 200
#define NVTT_FORBID_COPY(Class) \
#define NVTT_DECLARE_PIMPL(Class) \
private: \
Class(const Class &); \
void operator=(const Class &); \
public:
#define NVTT_DECLARE_PIMPL(Class) \
public: \
struct Private; \
Private & m
@ -64,9 +61,6 @@
// Public interface.
namespace nvtt
{
// Forward declarations.
struct TexImage;
/// Supported compression formats.
enum Format
{
@ -89,19 +83,6 @@ namespace nvtt
Format_BC3n = Format_DXT5n,
Format_BC4, // ATI1
Format_BC5, // 3DC, ATI2
Format_DXT1n,
Format_CTX1,
};
/// Pixel types.
enum PixelType
{
PixelType_UnsignedNorm,
PixelType_SignedNorm,
PixelType_UnsignedInt,
PixelType_SignedInt,
PixelType_Float,
};
/// Quality modes.
@ -116,7 +97,6 @@ namespace nvtt
/// Compression options. This class describes the desired compression format and other compression settings.
struct CompressionOptions
{
NVTT_FORBID_COPY(CompressionOptions);
NVTT_DECLARE_PIMPL(CompressionOptions);
NVTT_API CompressionOptions();
@ -132,23 +112,10 @@ namespace nvtt
// Set color mask to describe the RGB/RGBA format.
NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask);
NVTT_API void setPixelFormat(unsigned char rsize, unsigned char gsize, unsigned char bsize, unsigned char asize);
NVTT_API void setPixelType(PixelType pixelType);
NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
};
/*
// DXGI_FORMAT_R16G16_FLOAT
compressionOptions.setPixelType(PixelType_Float);
compressionOptions.setPixelFormat2(16, 16, 0, 0);
// DXGI_FORMAT_R32G32B32A32_FLOAT
compressionOptions.setPixelType(PixelType_Float);
compressionOptions.setPixelFormat2(32, 32, 32, 32);
*/
/// Wrap modes.
enum WrapMode
@ -170,7 +137,8 @@ namespace nvtt
enum InputFormat
{
InputFormat_BGRA_8UB,
InputFormat_RGBA_32F,
// InputFormat_RGBE_8UB,
// InputFormat_BGRA_32F,
};
/// Mipmap downsampling filters.
@ -181,23 +149,11 @@ namespace nvtt
MipmapFilter_Kaiser, ///< Kaiser-windowed Sinc filter is the best downsampling filter.
};
/// Texture resize filters.
enum ResizeFilter
{
ResizeFilter_Box,
ResizeFilter_Triangle,
ResizeFilter_Kaiser,
ResizeFilter_Mitchell,
};
/// Color transformation.
enum ColorTransform
{
ColorTransform_None,
ColorTransform_Linear, ///< Not implemented.
ColorTransform_Swizzle, ///< Not implemented.
ColorTransform_YCoCg, ///< Transform into r=Co, g=Cg, b=0, a=Y
ColorTransform_ScaledYCoCg, ///< Not implemented.
ColorTransform_Linear,
};
/// Extents rounding mode.
@ -220,7 +176,6 @@ namespace nvtt
/// Input options. Specify format and layout of the input texture.
struct InputOptions
{
NVTT_FORBID_COPY(InputOptions);
NVTT_DECLARE_PIMPL(InputOptions);
NVTT_API InputOptions();
@ -235,7 +190,6 @@ namespace nvtt
// Set mipmap data. Copies the data.
NVTT_API bool setMipmapData(const void * data, int w, int h, int d = 1, int face = 0, int mipmap = 0);
NVTT_API bool setMipmapChannelData(const void * data, int channel, int w, int h, int d = 1, int face = 0, int mipmap = 0);
// Describe the format of the input.
NVTT_API void setFormat(InputFormat format);
@ -246,7 +200,7 @@ namespace nvtt
// Set gamma settings.
NVTT_API void setGamma(float inputGamma, float outputGamma);
// Set texture wrapping mode.
// Set texture wrappign mode.
NVTT_API void setWrapMode(WrapMode mode);
// Set mipmapping options.
@ -261,18 +215,13 @@ namespace nvtt
NVTT_API void setNormalFilter(float sm, float medium, float big, float large);
NVTT_API void setNormalizeMipmaps(bool b);
// Set color transforms.
// Set color transforms. @@ Not implemented!
NVTT_API void setColorTransform(ColorTransform t);
NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3);
NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3, float offset);
NVTT_API void setSwizzleTransform(int x, int y, int z, int w);
// Set resizing options.
NVTT_API void setMaxExtents(int d);
NVTT_API void setRoundMode(RoundMode mode);
// Set whether or not to premultiply color by alpha
NVTT_API void setPremultiplyAlpha(bool b);
};
@ -297,7 +246,6 @@ namespace nvtt
Error_CudaError,
Error_FileOpen,
Error_FileWrite,
Error_UnsupportedOutputFormat,
};
/// Error handler.
@ -309,19 +257,11 @@ namespace nvtt
virtual void error(Error e) = 0;
};
/// Container.
enum Container
{
Container_DDS,
Container_DDS10,
};
/// Output Options. This class holds pointers to the interfaces that are used to report the output of
/// the compressor to the user.
struct OutputOptions
{
NVTT_FORBID_COPY(OutputOptions);
NVTT_DECLARE_PIMPL(OutputOptions);
NVTT_API OutputOptions();
@ -335,14 +275,12 @@ namespace nvtt
NVTT_API void setOutputHandler(OutputHandler * outputHandler);
NVTT_API void setErrorHandler(ErrorHandler * errorHandler);
NVTT_API void setOutputHeader(bool outputHeader);
NVTT_API void setContainer(Container container);
};
/// Context.
/// Texture compressor.
struct Compressor
{
NVTT_FORBID_COPY(Compressor);
NVTT_DECLARE_PIMPL(Compressor);
NVTT_API Compressor();
@ -356,90 +294,15 @@ namespace nvtt
// Estimate the size of compressing the input with the given options.
NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const;
// TexImage api
NVTT_API TexImage createTexImage();
NVTT_API int estimateSize(const TexImage & tex, const CompressionOptions & compressionOptions);
NVTT_API void outputCompressed(const TexImage & tex, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions);
};
// "Compressor" is deprecated. This should have been called "Context"
typedef Compressor Context;
/// A texture mipmap.
struct TexImage
{
NVTT_API TexImage();
NVTT_API TexImage(const TexImage & tex);
NVTT_API ~TexImage();
NVTT_API void operator=(const TexImage & tex);
// Texture parameters.
NVTT_API void setTextureType(TextureType type);
NVTT_API void setWrapMode(WrapMode mode);
NVTT_API void setAlphaMode(AlphaMode alphaMode);
NVTT_API void setNormalMap(bool isNormalMap);
// Accessors.
NVTT_API int width() const;
NVTT_API int height() const;
NVTT_API int depth() const;
NVTT_API int faceCount() const;
NVTT_API TextureType textureType() const;
NVTT_API WrapMode wrapMode() const;
NVTT_API AlphaMode alphaMode() const;
NVTT_API bool isNormalMap() const;
// Texture data.
NVTT_API bool load(const char * fileName);
NVTT_API bool setImage2D(InputFormat format, int w, int h, int idx, const void * data);
NVTT_API bool setImage2D(InputFormat format, int w, int h, int idx, const void * r, const void * g, const void * b, const void * a);
// Resizing methods.
NVTT_API void resize(int w, int h, ResizeFilter filter);
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter);
NVTT_API bool buildNextMipmap(MipmapFilter filter);
// Color transforms.
NVTT_API void toLinear(float gamma);
NVTT_API void toGamma(float gamma);
NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]);
NVTT_API void swizzle(int r, int g, int b, int a);
NVTT_API void scaleBias(int channel, float scale, float bias);
NVTT_API void blend(float r, float g, float b, float a, float t);
NVTT_API void premultiplyAlpha();
NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale);
NVTT_API void setBorder(float r, float g, float b, float a);
NVTT_API void fill(float r, float g, float b, float a);
// Set normal map options.
NVTT_API void toNormalMap(float sm, float medium, float big, float large);
NVTT_API void toHeightMap();
NVTT_API void normalizeNormalMap();
// Compress.
NVTT_API void outputCompressed(const CompressionOptions & compressionOptions, const OutputOptions & outputOptions);
private:
void detach();
struct Private;
Private * m;
};
// Return string for the given error code.
NVTT_API const char * errorString(Error e);
// Return NVTT version.
NVTT_API unsigned int version();
// Set callbacks.
//NVTT_API void setErrorCallback(ErrorCallback callback);
//NVTT_API void setMemoryCallbacks(...);
} // nvtt namespace
#endif // NV_TT_H

View File

@ -47,7 +47,7 @@
# define NVTT_API
#endif
#define NVTT_VERSION 201
#define NVTT_VERSION 200
#ifdef __cplusplus
typedef struct nvtt::InputOptions NvttInputOptions;
@ -156,7 +156,6 @@ typedef enum
NVTT_Error_Unknown,
NVTT_Error_FileOpen,
NVTT_Error_FileWrite,
NVTT_Error_UnsupportedOutputFormat,
} NvttError;
typedef enum

31
src/nvtt/squish/Makefile Normal file
View File

@ -0,0 +1,31 @@
include config
SRC = alpha.cpp clusterfit.cpp colourblock.cpp colourfit.cpp colourset.cpp maths.cpp rangefit.cpp singlecolourfit.cpp squish.cpp
OBJ = $(SRC:%.cpp=%.o)
LIB = libsquish.a
all : $(LIB)
install : $(LIB)
install squish.h $(INSTALL_DIR)/include
install libsquish.a $(INSTALL_DIR)/lib
uninstall:
$(RM) $(INSTALL_DIR)/include/squish.h
$(RM) $(INSTALL_DIR)/lib/libsquish.a
$(LIB) : $(OBJ)
$(AR) cr $@ $?
ranlib $@
%.o : %.cpp
$(CXX) $(CPPFLAGS) -I. $(CXXFLAGS) -o$@ -c $<
clean :
$(RM) $(OBJ) $(LIB)

View File

@ -28,7 +28,7 @@
#include "colourblock.h"
#include <cfloat>
namespace nvsquish {
namespace squish {
ClusterFit::ClusterFit()
{

View File

@ -23,15 +23,15 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_CLUSTERFIT_H
#define NV_SQUISH_CLUSTERFIT_H
#ifndef SQUISH_CLUSTERFIT_H
#define SQUISH_CLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace nvsquish {
namespace squish {
class ClusterFit : public ColourFit
{

View File

@ -25,7 +25,7 @@
#include "colourblock.h"
namespace nvsquish {
namespace squish {
static int FloatToInt( float a, int limit )
{

View File

@ -23,13 +23,13 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_COLOURBLOCK_H
#define NV_SQUISH_COLOURBLOCK_H
#ifndef SQUISH_COLOURBLOCK_H
#define SQUISH_COLOURBLOCK_H
#include "squish.h"
#include "maths.h"
namespace nvsquish {
namespace squish {
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );

View File

@ -26,7 +26,7 @@
#include "colourfit.h"
#include "colourset.h"
namespace nvsquish {
namespace squish {
ColourFit::ColourFit()
{

View File

@ -23,13 +23,13 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_COLOURFIT_H
#define NV_SQUISH_COLOURFIT_H
#ifndef SQUISH_COLOURFIT_H
#define SQUISH_COLOURFIT_H
#include "squish.h"
#include "maths.h"
namespace nvsquish {
namespace squish {
class ColourSet;

View File

@ -25,7 +25,7 @@
#include "colourset.h"
namespace nvsquish {
namespace squish {
// @@ Add flags:
// - MatchTransparent

View File

@ -23,14 +23,14 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_COLOURSET_H
#define NV_SQUISH_COLOURSET_H
#ifndef SQUISH_COLOURSET_H
#define SQUISH_COLOURSET_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
namespace nvsquish {
namespace squish {
/*! @brief Represents a set of block colours
*/

View File

@ -1,113 +0,0 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2008 Ignacio Castano castano@gmail.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include <stdio.h>
#include <float.h>
#include <math.h>
struct Precomp {
float alpha2_sum;
float beta2_sum;
float alphabeta_sum;
float factor;
};
int main()
{
int i = 0;
printf("struct Precomp {\n");
printf("\tfloat alpha2_sum;\n");
printf("\tfloat beta2_sum;\n");
printf("\tfloat alphabeta_sum;\n");
printf("\tfloat factor;\n");
printf("};\n\n");
printf("static const SQUISH_ALIGN_16 Precomp s_threeElement[153] = {\n");
// Three element clusters:
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
int c2 = 16 - c0 - c1;
Precomp p;
p.alpha2_sum = c0 + c1 * 0.25f;
p.beta2_sum = c2 + c1 * 0.25f;
p.alphabeta_sum = c1 * 0.25f;
p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
if (isfinite(p.factor))
{
printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2);
}
else
{
printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2);
}
i++;
}
}
printf("}; // %d three cluster elements\n\n", i);
printf("static const SQUISH_ALIGN_16 Precomp s_fourElement[969] = {\n");
// Four element clusters:
i = 0;
for( int c0 = 0; c0 <= 16; c0++)
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
int c3 = 16 - c0 - c1 - c2;
Precomp p;
p.alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
p.beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
p.alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
if (isfinite(p.factor))
{
printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2, c3);
}
else
{
printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2, c3);
}
i++;
}
}
}
printf("}; // %d four cluster elements\n\n", i);
return 0;
}

View File

@ -31,7 +31,7 @@
#include "fastclusterlookup.inl"
namespace nvsquish {
namespace squish {
FastClusterFit::FastClusterFit()
{
@ -129,8 +129,6 @@ void FastClusterFit::Compress3( void* block )
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half = VEC4_CONST(0.5f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -162,22 +160,25 @@ void FastClusterFit::Compress3( void* block )
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
{
@ -273,7 +274,7 @@ void FastClusterFit::Compress4( void* block )
Vec4 const factor = constants.SplatW();
i++;
Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0));
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);
Vec4 const betax_sum = m_xsum - alphax_sum;
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
@ -285,19 +286,18 @@ void FastClusterFit::Compress4( void* block )
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -370,12 +370,6 @@ void FastClusterFit::Compress4( void* block )
void FastClusterFit::Compress3( void* block )
{
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -405,9 +399,16 @@ void FastClusterFit::Compress3( void* block )
Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -476,12 +477,6 @@ void FastClusterFit::Compress3( void* block )
void FastClusterFit::Compress4( void* block )
{
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -516,9 +511,16 @@ void FastClusterFit::Compress4( void* block )
Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
// clamp to the grid
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;

View File

@ -24,15 +24,15 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_FASTCLUSTERFIT_H
#define NV_SQUISH_FASTCLUSTERFIT_H
#ifndef SQUISH_FASTCLUSTERFIT_H
#define SQUISH_FASTCLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace nvsquish {
namespace squish {
class FastClusterFit : public ColourFit
{

View File

@ -26,7 +26,7 @@
#include "maths.h"
#include <cfloat>
namespace nvsquish {
namespace squish {
Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights, Vec3::Arg metric )
{
@ -59,189 +59,28 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
return covariance;
}
#if 1
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
const int NUM = 8;
Vec3 v(1, 1, 1);
for (int i = 0; i < NUM; i++)
{
for(int i = 0; i < NUM; i++) {
float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2];
float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4];
float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5];
float norm = std::max(std::max(x, y), z);
float iv = 1.0f / norm;
if (norm == 0.0f) { // @@ I think this is not necessary in this case!!
return Vec3(0.0f);
}
v = Vec3(x*iv, y*iv, z*iv);
}
return v;
}
#else
static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
{
// compute M
Sym3x3 m;
m[0] = matrix[0] - evalue;
m[1] = matrix[1];
m[2] = matrix[2];
m[3] = matrix[3] - evalue;
m[4] = matrix[4];
m[5] = matrix[5] - evalue;
// compute U
Sym3x3 u;
u[0] = m[3]*m[5] - m[4]*m[4];
u[1] = m[2]*m[4] - m[1]*m[5];
u[2] = m[1]*m[4] - m[2]*m[3];
u[3] = m[0]*m[5] - m[2]*m[2];
u[4] = m[1]*m[2] - m[4]*m[0];
u[5] = m[0]*m[3] - m[1]*m[1];
// find the largest component
float mc = std::fabs( u[0] );
int mi = 0;
for( int i = 1; i < 6; ++i )
{
float c = std::fabs( u[i] );
if( c > mc )
{
mc = c;
mi = i;
}
}
// pick the column with this component
switch( mi )
{
case 0:
return Vec3( u[0], u[1], u[2] );
case 1:
case 3:
return Vec3( u[1], u[3], u[4] );
default:
return Vec3( u[2], u[4], u[5] );
}
}
static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue )
{
// compute M
Sym3x3 m;
m[0] = matrix[0] - evalue;
m[1] = matrix[1];
m[2] = matrix[2];
m[3] = matrix[3] - evalue;
m[4] = matrix[4];
m[5] = matrix[5] - evalue;
// find the largest component
float mc = std::fabs( m[0] );
int mi = 0;
for( int i = 1; i < 6; ++i )
{
float c = std::fabs( m[i] );
if( c > mc )
{
mc = c;
mi = i;
}
}
// pick the first eigenvector based on this index
switch( mi )
{
case 0:
case 1:
return Vec3( -m[1], m[0], 0.0f );
case 2:
return Vec3( m[2], 0.0f, -m[0] );
case 3:
case 4:
return Vec3( 0.0f, -m[4], m[3] );
default:
return Vec3( 0.0f, -m[5], m[4] );
}
}
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
// compute the cubic coefficients
float c0 = matrix[0]*matrix[3]*matrix[5]
+ 2.0f*matrix[1]*matrix[2]*matrix[4]
- matrix[0]*matrix[4]*matrix[4]
- matrix[3]*matrix[2]*matrix[2]
- matrix[5]*matrix[1]*matrix[1];
float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5]
- matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4];
float c2 = matrix[0] + matrix[3] + matrix[5];
// compute the quadratic coefficients
float a = c1 - ( 1.0f/3.0f )*c2*c2;
float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0;
// compute the root count check
float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a;
// test the multiplicity
if( FLT_EPSILON < Q )
{
// only one root, which implies we have a multiple of the identity
return Vec3( 1.0f );
}
else if( Q < -FLT_EPSILON )
{
// three distinct roots
float theta = std::atan2( std::sqrt( -Q ), -0.5f*b );
float rho = std::sqrt( 0.25f*b*b - Q );
float rt = std::pow( rho, 1.0f/3.0f );
float ct = std::cos( theta/3.0f );
float st = std::sin( theta/3.0f );
float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct;
float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st );
float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st );
// pick the larger
if( std::fabs( l2 ) > std::fabs( l1 ) )
l1 = l2;
if( std::fabs( l3 ) > std::fabs( l1 ) )
l1 = l3;
// get the eigenvector
return GetMultiplicity1Evector( matrix, l1 );
}
else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON )
{
// two roots
float rt;
if( b < 0.0f )
rt = -std::pow( -0.5f*b, 1.0f/3.0f );
else
rt = std::pow( 0.5f*b, 1.0f/3.0f );
float l1 = ( 1.0f/3.0f )*c2 + rt; // repeated
float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt;
// get the eigenvector
if( std::fabs( l1 ) > std::fabs( l2 ) )
return GetMultiplicity2Evector( matrix, l1 );
else
return GetMultiplicity1Evector( matrix, l2 );
}
}
#endif
} // namespace squish

View File

@ -23,14 +23,14 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_MATHS_H
#define NV_SQUISH_MATHS_H
#ifndef SQUISH_MATHS_H
#define SQUISH_MATHS_H
#include <cmath>
#include <algorithm>
#include "config.h"
namespace nvsquish {
namespace squish {
class Vec3
{

View File

@ -23,8 +23,8 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_SIMD_H
#define NV_SQUISH_SIMD_H
#ifndef SQUISH_SIMD_H
#define SQUISH_SIMD_H
#include "maths.h"

View File

@ -23,8 +23,8 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_SIMD_SSE_H
#define NV_SQUISH_SIMD_SSE_H
#ifndef SQUISH_SIMD_SSE_H
#define SQUISH_SIMD_SSE_H
#include <xmmintrin.h>
#if ( SQUISH_USE_SSE > 1 )
@ -35,7 +35,7 @@
#define SQUISH_SSE_SPLAT( a ) \
( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
namespace nvsquish {
namespace squish {
#define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) )

View File

@ -26,12 +26,10 @@
#ifndef SQUISH_SIMD_VE_H
#define SQUISH_SIMD_VE_H
#ifndef __APPLE_ALTIVEC__
#include <altivec.h>
#undef bool
#endif
namespace nvsquish {
namespace squish {
#define VEC4_CONST( X ) Vec4( ( vector float )( X ) )

View File

@ -23,11 +23,11 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_H
#define NV_SQUISH_H
#ifndef SQUISH_H
#define SQUISH_H
//! All squish API functions live in this namespace.
namespace nvsquish {
namespace squish {
// -----------------------------------------------------------------------------

View File

@ -0,0 +1,531 @@
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 42;
objects = {
/* Begin PBXBuildFile section */
133FA0DC096A7B8E0050752E /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 133FA0DA096A7B8E0050752E /* alpha.h */; };
133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 133FA0DB096A7B8E0050752E /* alpha.cpp */; };
1342B4160999DF1900152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; };
1342B41A0999DF7000152915 /* squishpng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B4190999DF7000152915 /* squishpng.cpp */; };
1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B43E0999E0CC00152915 /* squishtest.cpp */; };
1342B4420999E0EC00152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; };
1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70B092AA857005EE038 /* clusterfit.cpp */; };
1350D71B092AA858005EE038 /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D70C092AA858005EE038 /* clusterfit.h */; };
1350D71E092AA858005EE038 /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70F092AA858005EE038 /* colourblock.cpp */; };
1350D71F092AA858005EE038 /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D710092AA858005EE038 /* colourblock.h */; };
1350D720092AA858005EE038 /* config.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D711092AA858005EE038 /* config.h */; };
1350D721092AA858005EE038 /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D712092AA858005EE038 /* maths.cpp */; };
1350D722092AA858005EE038 /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D713092AA858005EE038 /* maths.h */; };
1350D725092AA858005EE038 /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D716092AA858005EE038 /* rangefit.cpp */; };
1350D726092AA858005EE038 /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D717092AA858005EE038 /* rangefit.h */; };
1350D727092AA858005EE038 /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D718092AA858005EE038 /* squish.cpp */; };
1350D728092AA858005EE038 /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D719092AA858005EE038 /* squish.h */; settings = {ATTRIBUTES = (Public, ); }; };
139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C21CE09ADAB0800A2500D /* squishgen.cpp */; };
139C234F09B0602700A2500D /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 139C234D09B0602700A2500D /* singlecolourfit.h */; };
139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C234E09B0602700A2500D /* singlecolourfit.cpp */; };
13A7CCA40952BE63001C963A /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 13A7CCA20952BE63001C963A /* colourfit.h */; };
13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13A7CCA30952BE63001C963A /* colourfit.cpp */; };
13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13C4C7AB0941C18000AC5B89 /* colourset.cpp */; };
13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 13C4C7AC0941C18000AC5B89 /* colourset.h */; };
13CD64C2092BCF8A00488C97 /* simd.h in Headers */ = {isa = PBXBuildFile; fileRef = 13CD64C0092BCF8A00488C97 /* simd.h */; };
13D0DC910931F93A00909807 /* simd_ve.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC900931F93A00909807 /* simd_ve.h */; };
13D0DC970931F9D600909807 /* simd_sse.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC960931F9D600909807 /* simd_sse.h */; };
/* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */
1342B52B099BF72F00152915 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = squish;
};
1342B58E099BF93D00152915 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = squish;
};
/* End PBXContainerItemProxy section */
/* Begin PBXFileReference section */
133FA0DA096A7B8E0050752E /* alpha.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = "<group>"; };
133FA0DB096A7B8E0050752E /* alpha.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = "<group>"; };
1342B4110999DE7F00152915 /* squishpng */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishpng; sourceTree = BUILT_PRODUCTS_DIR; };
1342B4190999DF7000152915 /* squishpng.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = squishpng.cpp; path = extra/squishpng.cpp; sourceTree = "<group>"; };
1342B4370999E07C00152915 /* squishtest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishtest; sourceTree = BUILT_PRODUCTS_DIR; };
1342B43E0999E0CC00152915 /* squishtest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishtest.cpp; path = extra/squishtest.cpp; sourceTree = "<group>"; };
1350D70B092AA857005EE038 /* clusterfit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfit.cpp; sourceTree = "<group>"; };
1350D70C092AA858005EE038 /* clusterfit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = clusterfit.h; sourceTree = "<group>"; };
1350D70F092AA858005EE038 /* colourblock.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourblock.cpp; sourceTree = "<group>"; };
1350D710092AA858005EE038 /* colourblock.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourblock.h; sourceTree = "<group>"; };
1350D711092AA858005EE038 /* config.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = "<group>"; };
1350D712092AA858005EE038 /* maths.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = maths.cpp; sourceTree = "<group>"; };
1350D713092AA858005EE038 /* maths.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = maths.h; sourceTree = "<group>"; };
1350D716092AA858005EE038 /* rangefit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = rangefit.cpp; sourceTree = "<group>"; };
1350D717092AA858005EE038 /* rangefit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = rangefit.h; sourceTree = "<group>"; };
1350D718092AA858005EE038 /* squish.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = squish.cpp; sourceTree = "<group>"; };
1350D719092AA858005EE038 /* squish.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = squish.h; sourceTree = "<group>"; };
13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = texture_compression_s3tc.txt; sourceTree = "<group>"; };
139C21C409ADAA7000A2500D /* squishgen */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishgen; sourceTree = BUILT_PRODUCTS_DIR; };
139C21CE09ADAB0800A2500D /* squishgen.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishgen.cpp; path = extra/squishgen.cpp; sourceTree = "<group>"; };
139C234D09B0602700A2500D /* singlecolourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = singlecolourfit.h; sourceTree = "<group>"; };
139C234E09B0602700A2500D /* singlecolourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = singlecolourfit.cpp; sourceTree = "<group>"; };
139C236D09B060A900A2500D /* singlecolourlookup.inl */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = singlecolourlookup.inl; sourceTree = "<group>"; };
13A7CCA20952BE63001C963A /* colourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colourfit.h; sourceTree = "<group>"; };
13A7CCA30952BE63001C963A /* colourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colourfit.cpp; sourceTree = "<group>"; };
13C4C7AB0941C18000AC5B89 /* colourset.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourset.cpp; sourceTree = "<group>"; };
13C4C7AC0941C18000AC5B89 /* colourset.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourset.h; sourceTree = "<group>"; };
13CD64C0092BCF8A00488C97 /* simd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd.h; sourceTree = "<group>"; };
13D0DC900931F93A00909807 /* simd_ve.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_ve.h; sourceTree = "<group>"; };
13D0DC960931F9D600909807 /* simd_sse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_sse.h; sourceTree = "<group>"; };
D2AAC046055464E500DB518D /* libsquish.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsquish.a; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
1342B40F0999DE7F00152915 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
1342B4160999DF1900152915 /* libsquish.a in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
1342B4350999E07C00152915 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
1342B4420999E0EC00152915 /* libsquish.a in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
139C21C209ADAA7000A2500D /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
D289987405E68DCB004EDB86 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
08FB7794FE84155DC02AAC07 /* squish */ = {
isa = PBXGroup;
children = (
08FB7795FE84155DC02AAC07 /* Source */,
C6A0FF2B0290797F04C91782 /* Documentation */,
1AB674ADFE9D54B511CA2CBB /* Products */,
);
name = squish;
sourceTree = "<group>";
};
08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup;
children = (
133FA0DB096A7B8E0050752E /* alpha.cpp */,
133FA0DA096A7B8E0050752E /* alpha.h */,
1350D70B092AA857005EE038 /* clusterfit.cpp */,
1350D70C092AA858005EE038 /* clusterfit.h */,
13A7CCA30952BE63001C963A /* colourfit.cpp */,
13A7CCA20952BE63001C963A /* colourfit.h */,
13C4C7AB0941C18000AC5B89 /* colourset.cpp */,
13C4C7AC0941C18000AC5B89 /* colourset.h */,
1350D70F092AA858005EE038 /* colourblock.cpp */,
1350D710092AA858005EE038 /* colourblock.h */,
13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */,
1350D711092AA858005EE038 /* config.h */,
1350D712092AA858005EE038 /* maths.cpp */,
1350D713092AA858005EE038 /* maths.h */,
1350D716092AA858005EE038 /* rangefit.cpp */,
1350D717092AA858005EE038 /* rangefit.h */,
13CD64C0092BCF8A00488C97 /* simd.h */,
13D0DC960931F9D600909807 /* simd_sse.h */,
13D0DC900931F93A00909807 /* simd_ve.h */,
139C234E09B0602700A2500D /* singlecolourfit.cpp */,
139C234D09B0602700A2500D /* singlecolourfit.h */,
139C236D09B060A900A2500D /* singlecolourlookup.inl */,
1350D718092AA858005EE038 /* squish.cpp */,
1350D719092AA858005EE038 /* squish.h */,
139C21CE09ADAB0800A2500D /* squishgen.cpp */,
1342B4190999DF7000152915 /* squishpng.cpp */,
1342B43E0999E0CC00152915 /* squishtest.cpp */,
);
name = Source;
sourceTree = "<group>";
};
1AB674ADFE9D54B511CA2CBB /* Products */ = {
isa = PBXGroup;
children = (
D2AAC046055464E500DB518D /* libsquish.a */,
1342B4110999DE7F00152915 /* squishpng */,
1342B4370999E07C00152915 /* squishtest */,
139C21C409ADAA7000A2500D /* squishgen */,
);
name = Products;
sourceTree = "<group>";
};
C6A0FF2B0290797F04C91782 /* Documentation */ = {
isa = PBXGroup;
children = (
);
name = Documentation;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */
D2AAC043055464E500DB518D /* Headers */ = {
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
1350D71B092AA858005EE038 /* clusterfit.h in Headers */,
1350D71F092AA858005EE038 /* colourblock.h in Headers */,
1350D720092AA858005EE038 /* config.h in Headers */,
1350D722092AA858005EE038 /* maths.h in Headers */,
1350D726092AA858005EE038 /* rangefit.h in Headers */,
1350D728092AA858005EE038 /* squish.h in Headers */,
13CD64C2092BCF8A00488C97 /* simd.h in Headers */,
13D0DC910931F93A00909807 /* simd_ve.h in Headers */,
13D0DC970931F9D600909807 /* simd_sse.h in Headers */,
13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */,
13A7CCA40952BE63001C963A /* colourfit.h in Headers */,
133FA0DC096A7B8E0050752E /* alpha.h in Headers */,
139C234F09B0602700A2500D /* singlecolourfit.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXHeadersBuildPhase section */
/* Begin PBXNativeTarget section */
1342B4100999DE7F00152915 /* squishpng */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */;
buildPhases = (
1342B40E0999DE7F00152915 /* Sources */,
1342B40F0999DE7F00152915 /* Frameworks */,
);
buildRules = (
);
dependencies = (
1342B58F099BF93D00152915 /* PBXTargetDependency */,
);
name = squishpng;
productName = squishpng;
productReference = 1342B4110999DE7F00152915 /* squishpng */;
productType = "com.apple.product-type.tool";
};
1342B4360999E07C00152915 /* squishtest */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */;
buildPhases = (
1342B4340999E07C00152915 /* Sources */,
1342B4350999E07C00152915 /* Frameworks */,
);
buildRules = (
);
dependencies = (
1342B52C099BF72F00152915 /* PBXTargetDependency */,
);
name = squishtest;
productName = squishtest;
productReference = 1342B4370999E07C00152915 /* squishtest */;
productType = "com.apple.product-type.tool";
};
139C21C309ADAA7000A2500D /* squishgen */ = {
isa = PBXNativeTarget;
buildConfigurationList = 139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */;
buildPhases = (
139C21C109ADAA7000A2500D /* Sources */,
139C21C209ADAA7000A2500D /* Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = squishgen;
productName = squishgen;
productReference = 139C21C409ADAA7000A2500D /* squishgen */;
productType = "com.apple.product-type.tool";
};
D2AAC045055464E500DB518D /* squish */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */;
buildPhases = (
D2AAC043055464E500DB518D /* Headers */,
D2AAC044055464E500DB518D /* Sources */,
D289987405E68DCB004EDB86 /* Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = squish;
productName = squish;
productReference = D2AAC046055464E500DB518D /* libsquish.a */;
productType = "com.apple.product-type.library.static";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
08FB7793FE84155DC02AAC07 /* Project object */ = {
isa = PBXProject;
buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */;
hasScannedForEncodings = 1;
mainGroup = 08FB7794FE84155DC02AAC07 /* squish */;
projectDirPath = "";
targets = (
D2AAC045055464E500DB518D /* squish */,
1342B4100999DE7F00152915 /* squishpng */,
1342B4360999E07C00152915 /* squishtest */,
139C21C309ADAA7000A2500D /* squishgen */,
);
};
/* End PBXProject section */
/* Begin PBXSourcesBuildPhase section */
1342B40E0999DE7F00152915 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
1342B41A0999DF7000152915 /* squishpng.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
1342B4340999E07C00152915 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
139C21C109ADAA7000A2500D /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
D2AAC044055464E500DB518D /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */,
1350D71E092AA858005EE038 /* colourblock.cpp in Sources */,
1350D721092AA858005EE038 /* maths.cpp in Sources */,
1350D725092AA858005EE038 /* rangefit.cpp in Sources */,
1350D727092AA858005EE038 /* squish.cpp in Sources */,
13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */,
13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */,
133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */,
139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin PBXTargetDependency section */
1342B52C099BF72F00152915 /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
target = D2AAC045055464E500DB518D /* squish */;
targetProxy = 1342B52B099BF72F00152915 /* PBXContainerItemProxy */;
};
1342B58F099BF93D00152915 /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
target = D2AAC045055464E500DB518D /* squish */;
targetProxy = 1342B58E099BF93D00152915 /* PBXContainerItemProxy */;
};
/* End PBXTargetDependency section */
/* Begin XCBuildConfiguration section */
1342B4140999DE9F00152915 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = (
..,
/sw/include,
);
INSTALL_PATH = "$(HOME)/bin";
LIBRARY_SEARCH_PATHS = /sw/lib;
OTHER_LDFLAGS = "-lpng";
PRODUCT_NAME = squishpng;
};
name = Debug;
};
1342B4150999DE9F00152915 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = (
..,
/sw/include,
);
INSTALL_PATH = "$(HOME)/bin";
LIBRARY_SEARCH_PATHS = /sw/lib;
OTHER_LDFLAGS = "-lpng";
PRODUCT_NAME = squishpng;
};
name = Release;
};
1342B43C0999E0C000152915 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishtest;
};
name = Debug;
};
1342B43D0999E0C000152915 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishtest;
};
name = Release;
};
139C21CC09ADAB0300A2500D /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishgen;
};
name = Debug;
};
139C21CD09ADAB0300A2500D /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishgen;
};
name = Release;
};
1DEB91EC08733DB70010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
COPY_PHASE_STRIP = NO;
GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1";
INSTALL_PATH = /usr/local/lib;
OTHER_CFLAGS = "-maltivec";
PRODUCT_NAME = squish;
STRIP_INSTALLED_PRODUCT = NO;
};
name = Debug;
};
1DEB91ED08733DB70010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1";
INSTALL_PATH = /usr/local/lib;
OTHER_CFLAGS = "-maltivec";
PRODUCT_NAME = squish;
STRIP_INSTALLED_PRODUCT = YES;
};
name = Release;
};
1DEB91F008733DB70010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
GCC_DYNAMIC_NO_PIC = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_TREAT_WARNINGS_AS_ERRORS = YES;
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_PEDANTIC = YES;
GCC_WARN_SHADOW = YES;
GCC_WARN_SIGN_COMPARE = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VALUE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
PREBINDING = NO;
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
};
name = Debug;
};
1DEB91F108733DB70010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
GCC_DYNAMIC_NO_PIC = YES;
GCC_OPTIMIZATION_LEVEL = 3;
GCC_TREAT_WARNINGS_AS_ERRORS = YES;
GCC_UNROLL_LOOPS = YES;
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_PEDANTIC = YES;
GCC_WARN_SHADOW = YES;
GCC_WARN_SIGN_COMPARE = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VALUE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
PREBINDING = NO;
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1342B4140999DE9F00152915 /* Debug */,
1342B4150999DE9F00152915 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1342B43C0999E0C000152915 /* Debug */,
1342B43D0999E0C000152915 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */ = {
isa = XCConfigurationList;
buildConfigurations = (
139C21CC09ADAB0300A2500D /* Debug */,
139C21CD09ADAB0300A2500D /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1DEB91EC08733DB70010E9CD /* Debug */,
1DEB91ED08733DB70010E9CD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1DEB91F008733DB70010E9CD /* Debug */,
1DEB91F108733DB70010E9CD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
}

View File

@ -0,0 +1,508 @@
Name
EXT_texture_compression_s3tc
Name Strings
GL_EXT_texture_compression_s3tc
Contact
Pat Brown, NVIDIA Corporation (pbrown 'at' nvidia.com)
Status
FINAL
Version
1.1, 16 November 2001 (containing only clarifications relative to
version 1.0, dated 7 July 2000)
Number
198
Dependencies
OpenGL 1.1 is required.
GL_ARB_texture_compression is required.
This extension is written against the OpenGL 1.2.1 Specification.
Overview
This extension provides additional texture compression functionality
specific to S3's S3TC format (called DXTC in Microsoft's DirectX API),
subject to all the requirements and limitations described by the extension
GL_ARB_texture_compression.
This extension supports DXT1, DXT3, and DXT5 texture compression formats.
For the DXT1 image format, this specification supports an RGB-only mode
and a special RGBA mode with single-bit "transparent" alpha.
IP Status
Contact S3 Incorporated (http://www.s3.com) regarding any intellectual
property issues associated with implementing this extension.
WARNING: Vendors able to support S3TC texture compression in Direct3D
drivers do not necessarily have the right to use the same functionality in
OpenGL.
Issues
(1) Should DXT2 and DXT4 (premultiplied alpha) formats be supported?
RESOLVED: No -- insufficient interest. Supporting DXT2 and DXT4
would require some rework to the TexEnv definition (maybe add a new
base internal format RGBA_PREMULTIPLIED_ALPHA) for these formats.
Note that the EXT_texture_env_combine extension (which extends normal
TexEnv modes) can be used to support textures with premultipled alpha.
(2) Should generic "RGB_S3TC_EXT" and "RGBA_S3TC_EXT" enums be supported
or should we use only the DXT<n> enums?
RESOLVED: No. A generic RGBA_S3TC_EXT is problematic because DXT3
and DXT5 are both nominally RGBA (and DXT1 with the 1-bit alpha is
also) yet one format must be chosen up front.
(3) Should TexSubImage support all block-aligned edits or just the minimal
functionality required by the ARB_texture_compression extension?
RESOLVED: Allow all valid block-aligned edits.
(4) A pre-compressed image with a DXT1 format can be used as either an
RGB_S3TC_DXT1 or an RGBA_S3TC_DXT1 image. If the image has
transparent texels, how are they treated in each format?
RESOLVED: The renderer has to make sure that an RGB_S3TC_DXT1 format
is decoded as RGB (where alpha is effectively one for all texels),
while RGBA_S3TC_DXT1 is decoded as RGBA (where alpha is zero for all
texels with "transparent" encodings). Otherwise, the formats are
identical.
(5) Is the encoding of the RGB components for DXT1 formats correct in this
spec? MSDN documentation does not specify an RGB color for the
"transparent" encoding. Is it really black?
RESOLVED: Yes. The specification for the DXT1 format initially
required black, but later changed that requirement to a
recommendation. All vendors involved in the definition of this
specification support black. In addition, specifying black has a
useful behavior.
When blending multiple texels (GL_LINEAR filtering), mixing opaque and
transparent samples is problematic. Defining a black color on
transparent texels achieves a sensible result that works like a
texture with premultiplied alpha. For example, if three opaque white
and one transparent sample is being averaged, the result would be a
75% intensity gray (with an alpha of 75%). This is the same result on
the color channels as would be obtained using a white color, 75%
alpha, and a SRC_ALPHA blend factor.
(6) Is the encoding of the RGB components for DXT3 and DXT5 formats
correct in this spec? MSDN documentation suggests that the RGB blocks
for DXT3 and DXT5 are decoded as described by the DXT1 format.
RESOLVED: Yes -- this appears to be a bug in the MSDN documentation.
The specification for the DXT2-DXT5 formats require decoding using the
opaque block encoding, regardless of the relative values of "color0"
and "color1".
New Procedures and Functions
None.
New Tokens
Accepted by the <internalformat> parameter of TexImage2D, CopyTexImage2D,
and CompressedTexImage2DARB and the <format> parameter of
CompressedTexSubImage2DARB:
COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0
COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1
COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2
COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3
Additions to Chapter 2 of the OpenGL 1.2.1 Specification (OpenGL Operation)
None.
Additions to Chapter 3 of the OpenGL 1.2.1 Specification (Rasterization)
Add to Table 3.16.1: Specific Compressed Internal Formats
Compressed Internal Format Base Internal Format
========================== ====================
COMPRESSED_RGB_S3TC_DXT1_EXT RGB
COMPRESSED_RGBA_S3TC_DXT1_EXT RGBA
COMPRESSED_RGBA_S3TC_DXT3_EXT RGBA
COMPRESSED_RGBA_S3TC_DXT5_EXT RGBA
Modify Section 3.8.2, Alternate Image Specification
(add to end of TexSubImage discussion, p.123 -- after edit from the
ARB_texture_compression spec)
If the internal format of the texture image being modified is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
texture is stored using one of the several S3TC compressed texture image
formats. Such images are easily edited along 4x4 texel boundaries, so the
limitations on TexSubImage2D or CopyTexSubImage2D parameters are relaxed.
TexSubImage2D and CopyTexSubImage2D will result in an INVALID_OPERATION
error only if one of the following conditions occurs:
* <width> is not a multiple of four or equal to TEXTURE_WIDTH,
unless <xoffset> and <yoffset> are both zero.
* <height> is not a multiple of four or equal to TEXTURE_HEIGHT,
unless <xoffset> and <yoffset> are both zero.
* <xoffset> or <yoffset> is not a multiple of four.
The contents of any 4x4 block of texels of an S3TC compressed texture
image that does not intersect the area being modified are preserved during
valid TexSubImage2D and CopyTexSubImage2D calls.
Add to Section 3.8.2, Alternate Image Specification (adding to the end of
the CompressedTexImage section introduced by the ARB_texture_compression
spec)
If <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
COMPRESSED_RGBA_S3TC_DXT5_EXT, the compressed texture is stored using one
of several S3TC compressed texture image formats. The S3TC texture
compression algorithm supports only 2D images without borders.
CompressedTexImage1DARB and CompressedTexImage3DARB produce an
INVALID_ENUM error if <internalformat> is an S3TC format.
CompressedTexImage2DARB will produce an INVALID_OPERATION error if
<border> is non-zero.
Add to Section 3.8.2, Alternate Image Specification (adding to the end of
the CompressedTexSubImage section introduced by the
ARB_texture_compression spec)
If the internal format of the texture image being modified is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
texture is stored using one of the several S3TC compressed texture image
formats. Since the S3TC texture compression algorithm supports only 2D
images, CompressedTexSubImage1DARB and CompressedTexSubImage3DARB produce
an INVALID_ENUM error if <format> is an S3TC format. Since S3TC images
are easily edited along 4x4 texel boundaries, the limitations on
CompressedTexSubImage2D are relaxed. CompressedTexSubImage2D will result
in an INVALID_OPERATION error only if one of the following conditions
occurs:
* <width> is not a multiple of four or equal to TEXTURE_WIDTH.
* <height> is not a multiple of four or equal to TEXTURE_HEIGHT.
* <xoffset> or <yoffset> is not a multiple of four.
The contents of any 4x4 block of texels of an S3TC compressed texture
image that does not intersect the area being modified are preserved during
valid TexSubImage2D and CopyTexSubImage2D calls.
Additions to Chapter 4 of the OpenGL 1.2.1 Specification (Per-Fragment
Operations and the Frame Buffer)
None.
Additions to Chapter 5 of the OpenGL 1.2.1 Specification (Special Functions)
None.
Additions to Chapter 6 of the OpenGL 1.2.1 Specification (State and
State Requests)
None.
Additions to Appendix A of the OpenGL 1.2.1 Specification (Invariance)
None.
Additions to the AGL/GLX/WGL Specifications
None.
GLX Protocol
None.
Errors
INVALID_ENUM is generated by CompressedTexImage1DARB or
CompressedTexImage3DARB if <internalformat> is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT.
INVALID_OPERATION is generated by CompressedTexImage2DARB if
<internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
COMPRESSED_RGBA_S3TC_DXT5_EXT and <border> is not equal to zero.
INVALID_ENUM is generated by CompressedTexSubImage1DARB or
CompressedTexSubImage3DARB if <format> is COMPRESSED_RGB_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
COMPRESSED_RGBA_S3TC_DXT5_EXT.
INVALID_OPERATION is generated by TexSubImage2D CopyTexSubImage2D, or
CompressedTexSubImage2D if TEXTURE_INTERNAL_FORMAT is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT and any of
the following apply: <width> is not a multiple of four or equal to
TEXTURE_WIDTH; <height> is not a multiple of four or equal to
TEXTURE_HEIGHT; <xoffset> or <yoffset> is not a multiple of four.
The following restrictions from the ARB_texture_compression specification
do not apply to S3TC texture formats, since subimage modification is
straightforward as long as the subimage is properly aligned.
DELETE: INVALID_OPERATION is generated by TexSubImage1D, TexSubImage2D,
DELETE: TexSubImage3D, CopyTexSubImage1D, CopyTexSubImage2D, or
DELETE: CopyTexSubImage3D if the internal format of the texture image is
DELETE: compressed and <xoffset>, <yoffset>, or <zoffset> does not equal
DELETE: -b, where b is value of TEXTURE_BORDER.
DELETE: INVALID_VALUE is generated by CompressedTexSubImage1DARB,
DELETE: CompressedTexSubImage2DARB, or CompressedTexSubImage3DARB if the
DELETE: entire texture image is not being edited: if <xoffset>,
DELETE: <yoffset>, or <zoffset> is greater than -b, <xoffset> + <width> is
DELETE: less than w+b, <yoffset> + <height> is less than h+b, or <zoffset>
DELETE: + <depth> is less than d+b, where b is the value of
DELETE: TEXTURE_BORDER, w is the value of TEXTURE_WIDTH, h is the value of
DELETE: TEXTURE_HEIGHT, and d is the value of TEXTURE_DEPTH.
See also errors in the GL_ARB_texture_compression specification.
New State
In the "Textures" state table, increment the TEXTURE_INTERNAL_FORMAT
subscript for Z by 4 in the "Type" row.
New Implementation Dependent State
None
Appendix
S3TC Compressed Texture Image Formats
Compressed texture images stored using the S3TC compressed image formats
are represented as a collection of 4x4 texel blocks, where each block
contains 64 or 128 bits of texel data. The image is encoded as a normal
2D raster image in which each 4x4 block is treated as a single pixel. If
an S3TC image has a width or height less than four, the data corresponding
to texels outside the image are irrelevant and undefined.
When an S3TC image with a width of <w>, height of <h>, and block size of
<blocksize> (8 or 16 bytes) is decoded, the corresponding image size (in
bytes) is:
ceil(<w>/4) * ceil(<h>/4) * blocksize.
When decoding an S3TC image, the block containing the texel at offset
(<x>, <y>) begins at an offset (in bytes) relative to the base of the
image of:
blocksize * (ceil(<w>/4) * floor(<y>/4) + floor(<x>/4)).
The data corresponding to a specific texel (<x>, <y>) are extracted from a
4x4 texel block using a relative (x,y) value of
(<x> modulo 4, <y> modulo 4).
There are four distinct S3TC image formats:
COMPRESSED_RGB_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64
bits of RGB image data.
Each RGB image data block is encoded as a sequence of 8 bytes, called (in
order of increasing address):
c0_lo, c0_hi, c1_lo, c1_hi, bits_0, bits_1, bits_2, bits_3
The 8 bytes of the block are decoded into three quantities:
color0 = c0_lo + c0_hi * 256
color1 = c1_lo + c1_hi * 256
bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * bits_3))
color0 and color1 are 16-bit unsigned integers that are unpacked to
RGB colors RGB0 and RGB1 as though they were 16-bit packed pixels with
a <format> of RGB and a type of UNSIGNED_SHORT_5_6_5.
bits is a 32-bit unsigned integer, from which a two-bit control code
is extracted for a texel at location (x,y) in the block using:
code(x,y) = bits[2*(4*y+x)+1..2*(4*y+x)+0]
where bit 31 is the most significant and bit 0 is the least
significant bit.
The RGB color for a texel at location (x,y) in the block is given by:
RGB0, if color0 > color1 and code(x,y) == 0
RGB1, if color0 > color1 and code(x,y) == 1
(2*RGB0+RGB1)/3, if color0 > color1 and code(x,y) == 2
(RGB0+2*RGB1)/3, if color0 > color1 and code(x,y) == 3
RGB0, if color0 <= color1 and code(x,y) == 0
RGB1, if color0 <= color1 and code(x,y) == 1
(RGB0+RGB1)/2, if color0 <= color1 and code(x,y) == 2
BLACK, if color0 <= color1 and code(x,y) == 3
Arithmetic operations are done per component, and BLACK refers to an
RGB color where red, green, and blue are all zero.
Since this image has an RGB format, there is no alpha component and the
image is considered fully opaque.
COMPRESSED_RGBA_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64
bits of RGB image data and minimal alpha information. The RGB components
of a texel are extracted in the same way as COMPRESSED_RGB_S3TC_DXT1_EXT.
The alpha component for a texel at location (x,y) in the block is
given by:
0.0, if color0 <= color1 and code(x,y) == 3
1.0, otherwise
IMPORTANT: When encoding an RGBA image into a format using 1-bit
alpha, any texels with an alpha component less than 0.5 end up with an
alpha of 0.0 and any texels with an alpha component greater than or
equal to 0.5 end up with an alpha of 1.0. When encoding an RGBA image
into the COMPRESSED_RGBA_S3TC_DXT1_EXT format, the resulting red,
green, and blue components of any texels with a final alpha of 0.0
will automatically be zero (black). If this behavior is not desired
by an application, it should not use COMPRESSED_RGBA_S3TC_DXT1_EXT.
This format will never be used when a generic compressed internal
format (Table 3.16.2) is specified, although the nearly identical
format COMPRESSED_RGB_S3TC_DXT1_EXT (above) may be.
COMPRESSED_RGBA_S3TC_DXT3_EXT: Each 4x4 block of texels consists of 64
bits of uncompressed alpha image data followed by 64 bits of RGB image
data.
Each RGB image data block is encoded according to the
COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
bits always use the non-transparent encodings. In other words, they are
treated as though color0 > color1, regardless of the actual values of
color0 and color1.
Each alpha image data block is encoded as a sequence of 8 bytes, called
(in order of increasing address):
a0, a1, a2, a3, a4, a5, a6, a7
The 8 bytes of the block are decoded into one 64-bit integer:
alpha = a0 + 256 * (a1 + 256 * (a2 + 256 * (a3 + 256 * (a4 +
256 * (a5 + 256 * (a6 + 256 * a7))))))
alpha is a 64-bit unsigned integer, from which a four-bit alpha value
is extracted for a texel at location (x,y) in the block using:
alpha(x,y) = bits[4*(4*y+x)+3..4*(4*y+x)+0]
where bit 63 is the most significant and bit 0 is the least
significant bit.
The alpha component for a texel at location (x,y) in the block is
given by alpha(x,y) / 15.
COMPRESSED_RGBA_S3TC_DXT5_EXT: Each 4x4 block of texels consists of 64
bits of compressed alpha image data followed by 64 bits of RGB image data.
Each RGB image data block is encoded according to the
COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
bits always use the non-transparent encodings. In other words, they are
treated as though color0 > color1, regardless of the actual values of
color0 and color1.
Each alpha image data block is encoded as a sequence of 8 bytes, called
(in order of increasing address):
alpha0, alpha1, bits_0, bits_1, bits_2, bits_3, bits_4, bits_5
The alpha0 and alpha1 are 8-bit unsigned bytes converted to alpha
components by multiplying by 1/255.
The 6 "bits" bytes of the block are decoded into one 48-bit integer:
bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 +
256 * (bits_4 + 256 * bits_5))))
bits is a 48-bit unsigned integer, from which a three-bit control code
is extracted for a texel at location (x,y) in the block using:
code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0]
where bit 47 is the most significant and bit 0 is the least
significant bit.
The alpha component for a texel at location (x,y) in the block is
given by:
alpha0, code(x,y) == 0
alpha1, code(x,y) == 1
(6*alpha0 + 1*alpha1)/7, alpha0 > alpha1 and code(x,y) == 2
(5*alpha0 + 2*alpha1)/7, alpha0 > alpha1 and code(x,y) == 3
(4*alpha0 + 3*alpha1)/7, alpha0 > alpha1 and code(x,y) == 4
(3*alpha0 + 4*alpha1)/7, alpha0 > alpha1 and code(x,y) == 5
(2*alpha0 + 5*alpha1)/7, alpha0 > alpha1 and code(x,y) == 6
(1*alpha0 + 6*alpha1)/7, alpha0 > alpha1 and code(x,y) == 7
(4*alpha0 + 1*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 2
(3*alpha0 + 2*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 3
(2*alpha0 + 3*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 4
(1*alpha0 + 4*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 5
0.0, alpha0 <= alpha1 and code(x,y) == 6
1.0, alpha0 <= alpha1 and code(x,y) == 7
Revision History
1.1, 11/16/01 pbrown: Updated contact info, clarified where texels
fall within a single block.
1.0, 07/07/00 prbrown1: Published final version agreed to by working
group members.
0.9, 06/24/00 prbrown1: Documented that block-aligned TexSubImage calls
do not modify existing texels outside the
modified blocks. Added caveat to allow for a
(0,0)-anchored TexSubImage operation of
arbitrary size.
0.7, 04/11/00 prbrown1: Added issues on DXT1, DXT3, and DXT5 encodings
where the MSDN documentation doesn't match what
is really done. Added enum values from the
extension registry.
0.4, 03/28/00 prbrown1: Updated to reflect final version of the
ARB_texture_compression extension. Allowed
block-aligned TexSubImage calls.
0.3, 03/07/00 prbrown1: Resolved issues pertaining to the format of RGB
blocks in the DXT3 and DXT5 formats (they don't
ever use the "transparent" encoding). Fixed
decoding of DXT1 blocks. Pointed out issue of
"transparent" texels in DXT1 encodings having
different behaviors for RGB and RGBA internal
formats.
0.2, 02/23/00 prbrown1: Minor revisions; added several issues.
0.11, 02/17/00 prbrown1: Slight modification to error semantics
(INVALID_ENUM instead of INVALID_OPERATION).
0.1, 02/15/00 prbrown1: Initial revision.

View File

@ -0,0 +1,39 @@
Microsoft Visual Studio Solution File, Format Version 8.00
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squish", "squish\squish.vcproj", "{6A8518C3-D81A-4428-BD7F-C37933088AC1}"
ProjectSection(ProjectDependencies) = postProject
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishpng", "squishpng\squishpng.vcproj", "{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}"
ProjectSection(ProjectDependencies) = postProject
{6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishtest", "squishtest\squishtest.vcproj", "{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}"
ProjectSection(ProjectDependencies) = postProject
{6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfiguration) = preSolution
Debug = Debug
Release = Release
EndGlobalSection
GlobalSection(ProjectConfiguration) = postSolution
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.ActiveCfg = Debug|Win32
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.Build.0 = Debug|Win32
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.ActiveCfg = Release|Win32
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.Build.0 = Release|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.ActiveCfg = Debug|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.Build.0 = Debug|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.ActiveCfg = Release|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.Build.0 = Release|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.ActiveCfg = Debug|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.Build.0 = Debug|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.ActiveCfg = Release|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
EndGlobalSection
GlobalSection(ExtensibilityAddIns) = postSolution
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,198 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.10"
Name="squish"
ProjectGUID="{6A8518C3-D81A-4428-BD7F-C37933088AC1}"
Keyword="Win32Proj">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="4"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;SQUISH_USE_SSE=1"
MinimalRebuild="TRUE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
EnableEnhancedInstructionSet="1"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLibrarianTool"
OutputFile="$(OutDir)/squish.lib"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="4"
CharacterSet="2"
WholeProgramOptimization="TRUE">
<Tool
Name="VCCLCompilerTool"
GlobalOptimizations="TRUE"
InlineFunctionExpansion="2"
FavorSizeOrSpeed="1"
OmitFramePointers="TRUE"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;SQUISH_USE_SSE=1"
RuntimeLibrary="2"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLibrarianTool"
OutputFile="$(OutDir)/squish.lib"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
RelativePath="..\..\alpha.cpp">
</File>
<File
RelativePath="..\..\clusterfit.cpp">
</File>
<File
RelativePath="..\..\colourblock.cpp">
</File>
<File
RelativePath="..\..\colourfit.cpp">
</File>
<File
RelativePath="..\..\colourset.cpp">
</File>
<File
RelativePath="..\..\maths.cpp">
</File>
<File
RelativePath="..\..\rangefit.cpp">
</File>
<File
RelativePath="..\..\singlecolourfit.cpp">
</File>
<File
RelativePath="..\..\squish.cpp">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
<File
RelativePath="..\..\alpha.h">
</File>
<File
RelativePath="..\..\clusterfit.h">
</File>
<File
RelativePath="..\..\colourblock.h">
</File>
<File
RelativePath="..\..\colourfit.h">
</File>
<File
RelativePath="..\..\colourset.h">
</File>
<File
RelativePath="..\..\config.h">
</File>
<File
RelativePath="..\..\maths.h">
</File>
<File
RelativePath="..\..\rangefit.h">
</File>
<File
RelativePath="..\..\simd.h">
</File>
<File
RelativePath="..\..\simd_sse.h">
</File>
<File
RelativePath="..\..\simd_ve.h">
</File>
<File
RelativePath="..\..\singlecolourfit.h">
</File>
<File
RelativePath="..\..\singlecolourlookup.inl">
</File>
<File
RelativePath="..\..\squish.h">
</File>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
<File
RelativePath="..\..\texture_compression_s3tc.txt">
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -0,0 +1,140 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.10"
Name="squishpng"
ProjectGUID="{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}"
Keyword="Win32Proj">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="TRUE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="libpng13d.lib"
OutputFile="$(OutDir)/squishpng.exe"
LinkIncremental="2"
GenerateDebugInformation="TRUE"
ProgramDatabaseFile="$(OutDir)/squishpng.pdb"
SubSystem="1"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="libpng13.lib"
OutputFile="$(OutDir)/squishpng.exe"
LinkIncremental="1"
GenerateDebugInformation="TRUE"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
RelativePath="..\..\extra\squishpng.cpp">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -0,0 +1,138 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.10"
Name="squishtest"
ProjectGUID="{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}"
Keyword="Win32Proj">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="TRUE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
OutputFile="$(OutDir)/squishtest.exe"
LinkIncremental="2"
GenerateDebugInformation="TRUE"
ProgramDatabaseFile="$(OutDir)/squishtest.pdb"
SubSystem="1"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
OutputFile="$(OutDir)/squishtest.exe"
LinkIncremental="1"
GenerateDebugInformation="TRUE"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
RelativePath="..\..\extra\squishtest.cpp">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -30,7 +30,7 @@
#include <cfloat>
namespace nvsquish {
namespace squish {
WeightedClusterFit::WeightedClusterFit()
{
@ -131,13 +131,10 @@ float WeightedClusterFit::GetBestError() const
void WeightedClusterFit::Compress3( void* block )
{
int const count = m_colours->GetCount();
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -149,11 +146,11 @@ void WeightedClusterFit::Compress3( void* block )
int b0 = 0, b1 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec4 x1 = zero;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec4 const x2 = m_xsum - x1 - x0;
@ -176,21 +173,24 @@ void WeightedClusterFit::Compress3( void* block )
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -221,17 +221,17 @@ void WeightedClusterFit::Compress3( void* block )
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
// save the block
@ -244,16 +244,12 @@ void WeightedClusterFit::Compress3( void* block )
void WeightedClusterFit::Compress4( void* block )
{
int const count = m_colours->GetCount();
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half = VEC4_CONST(0.5f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -264,30 +260,30 @@ void WeightedClusterFit::Compress4( void* block )
int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec4 x1 = zero;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec4 x2 = zero;
for( int c2 = 0; c2 <= count-c0-c1; c2++)
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
Vec4 const x3 = m_xsum - x2 - x1 - x0;
//Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); // alphax_sum, alpha2_sum
Vec4 const alpha2_sum = alphax_sum.SplatW();
//Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
Vec4 const betax_sum = x3 + MultiplyAdd(x2, twothirds, x1 * onethird); // betax_sum, beta2_sum
Vec4 const beta2_sum = betax_sum.SplatW();
//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum
//float const alphabeta_sum = w1 * (2.0f/9.0f) + w2 * (2.0f/9.0f);
Vec4 const alphabeta_sum = two * (x1 * onethird + x2 * onethird).SplatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
@ -295,21 +291,24 @@ void WeightedClusterFit::Compress4( void* block )
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -347,20 +346,18 @@ void WeightedClusterFit::Compress4( void* block )
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
// save the error
m_besterror = besterror;
@ -371,13 +368,6 @@ void WeightedClusterFit::Compress4( void* block )
void WeightedClusterFit::Compress3( void* block )
{
int const count = m_colours->GetCount();
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -389,12 +379,12 @@ void WeightedClusterFit::Compress3( void* block )
int b0 = 0, b1 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
float w2 = m_wsum - w0 - w1;
@ -410,9 +400,16 @@ void WeightedClusterFit::Compress3( void* block )
Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -453,20 +450,18 @@ void WeightedClusterFit::Compress3( void* block )
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock3( beststart, bestend, bestindices, block );
WriteColourBlock3( beststart, bestend, ordered, block );
// save the error
m_besterror = besterror;
@ -475,13 +470,6 @@ void WeightedClusterFit::Compress3( void* block )
void WeightedClusterFit::Compress4( void* block )
{
int const count = m_colours->GetCount();
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -492,17 +480,17 @@ void WeightedClusterFit::Compress4( void* block )
int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec3 x2(0.0f);
float w2 = 0.0f;
for( int c2 = 0; c2 <= count-c0-c1; c2++)
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
float w3 = m_wsum - w0 - w1 - w2;
@ -517,9 +505,16 @@ void WeightedClusterFit::Compress4( void* block )
Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
// clamp to the grid
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -568,20 +563,18 @@ void WeightedClusterFit::Compress4( void* block )
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock4( beststart, bestend, bestindices, block );
WriteColourBlock4( beststart, bestend, ordered, block );
// save the error
m_besterror = besterror;

View File

@ -24,15 +24,15 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_WEIGHTEDCLUSTERFIT_H
#define NV_SQUISH_WEIGHTEDCLUSTERFIT_H
#ifndef SQUISH_WEIGHTEDCLUSTERFIT_H
#define SQUISH_WEIGHTEDCLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace nvsquish {
namespace squish {
class WeightedClusterFit : public ColourFit
{

View File

@ -1,28 +0,0 @@
ADD_EXECUTABLE(filtertest filtertest.cpp ../tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(nvtestsuite testsuite.cpp)
TARGET_LINK_LIBRARIES(nvtestsuite nvcore nvmath nvimage nvtt)
ADD_TEST(NVTT.TestSuite.Kodak.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 0 -out output-cuda-kodak)
ADD_TEST(NVTT.TestSuite.Waterloo.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 1 -out output-cuda-waterloo)
ADD_TEST(NVTT.TestSuite.Epic.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 2 -out output-cuda-epic)
ADD_TEST(NVTT.TestSuite.Kodak.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 0 -nocuda -out output-nocuda-kodak)
ADD_TEST(NVTT.TestSuite.Waterloo.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 1 -nocuda -out output-nocuda-waterloo)
ADD_TEST(NVTT.TestSuite.Epic.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 2 -nocuda -out output-nocuda-epic)
IF (CUDA_FOUND)
ADD_EXECUTABLE(driverapitest driverapi.cpp)
TARGET_LINK_LIBRARIES(driverapitest nvcore nvmath nvimage)
ENDIF (CUDA_FOUND)
ADD_EXECUTABLE(imperativeapi imperativeapi.cpp)
TARGET_LINK_LIBRARIES(imperativeapi nvcore nvmath nvimage nvtt)
INSTALL(TARGETS nvtestsuite DESTINATION bin)
#include_directories("/usr/include/ffmpeg/")
#ADD_EXECUTABLE(nvmpegenc tools/mpegenc.cpp tools/cmdline.h)
#TARGET_LINK_LIBRARIES(nvmpegenc nvcore nvmath nvimage avcodec z)
#INSTALL(TARGETS nvmpegenc DESTINATION bin)

View File

@ -1,144 +0,0 @@
#include <nvcore/Library.h>
#include <cuda.h>
#include <stdio.h>
// Typedefs
typedef CUresult CUDAAPI (*cuInitPtr)( unsigned int Flags );
typedef CUresult CUDAAPI (*cuDeviceGetPtr)(CUdevice *device, int ordinal);
typedef CUresult CUDAAPI (*cuDeviceGetCountPtr)(int *count);
typedef CUresult CUDAAPI (*cuDeviceGetNamePtr)(char *name, int len, CUdevice dev);
typedef CUresult CUDAAPI (*cuDeviceComputeCapabilityPtr)(int *major, int *minor, CUdevice dev);
typedef CUresult CUDAAPI (*cuDeviceTotalMemPtr)(unsigned int *bytes, CUdevice dev);
typedef CUresult CUDAAPI (*cuDeviceGetPropertiesPtr)(CUdevprop *prop, CUdevice dev);
typedef CUresult CUDAAPI (*cuDeviceGetAttributePtr)(int *pi, CUdevice_attribute attrib, CUdevice dev);
typedef CUresult CUDAAPI (*cuCtxCreatePtr)(CUcontext *pctx, unsigned int flags, CUdevice dev );
typedef CUresult CUDAAPI (*cuCtxDestroyPtr)( CUcontext ctx );
typedef CUresult CUDAAPI (*cuCtxAttachPtr)(CUcontext *pctx, unsigned int flags);
typedef CUresult CUDAAPI (*cuCtxDetachPtr)(CUcontext ctx);
typedef CUresult CUDAAPI (*cuCtxPushCurrentPtr)( CUcontext ctx );
typedef CUresult CUDAAPI (*cuCtxPopCurrentPtr)( CUcontext *pctx );
typedef CUresult CUDAAPI (*cuCtxGetDevicePtr)(CUdevice *device);
typedef CUresult CUDAAPI (*cuCtxSynchronizePtr)(void);
// A compressor inits CUDA and creates a context for each device.
//
struct CudaDevice
{
CUdevice device;
CUcontext context;
};
struct CudaContext
{
CudaContext()
{
printf("CudaContext()\n");
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
cuInit = (cuInitPtr)nvcuda.bindSymbol("cuInit");
cuDeviceGet = (cuDeviceGetPtr)nvcuda.bindSymbol("cuDeviceGet");
cuDeviceGetCount = (cuDeviceGetCountPtr)nvcuda.bindSymbol("cuDeviceGetCount");
cuDeviceGetName = (cuDeviceGetNamePtr)nvcuda.bindSymbol("cuDeviceGetName");
cuDeviceComputeCapability = (cuDeviceComputeCapabilityPtr)nvcuda.bindSymbol("cuDeviceComputeCapability");
cuDeviceTotalMem = (cuDeviceTotalMemPtr)nvcuda.bindSymbol("cuDeviceTotalMem");
cuDeviceGetProperties = (cuDeviceGetPropertiesPtr)nvcuda.bindSymbol("cuDeviceGetProperties");
cuDeviceGetAttribute = (cuDeviceGetAttributePtr)nvcuda.bindSymbol("cuDeviceGetAttribute");
cuCtxCreate = (cuCtxCreatePtr)nvcuda.bindSymbol("cuCtxCreate");
cuCtxDestroy = (cuCtxDestroyPtr)nvcuda.bindSymbol("cuCtxDestroy");
cuCtxAttach = (cuCtxAttachPtr)nvcuda.bindSymbol("cuCtxAttach");
cuCtxDetach = (cuCtxDetachPtr)nvcuda.bindSymbol("cuCtxDetach");
cuCtxPushCurrent = (cuCtxPushCurrentPtr)nvcuda.bindSymbol("cuCtxPushCurrent");
cuCtxPopCurrent = (cuCtxPopCurrentPtr)nvcuda.bindSymbol("cuCtxPopCurrent");
cuCtxGetDevice = (cuCtxGetDevicePtr)nvcuda.bindSymbol("cuCtxGetDevice");
cuCtxSynchronize = (cuCtxSynchronizePtr)nvcuda.bindSymbol("cuCtxSynchronize");
CUresult status = cuInit(0);
if (status == CUDA_SUCCESS)
{
printf("cuInit succeeded.\n");
}
m_deviceCount = 0;
cuDeviceGetCount(&m_deviceCount);
printf("%d devices found.\n", m_deviceCount);
if (m_deviceCount > 0)
{
m_devices = new CudaDevice[m_deviceCount];
uint flags = CU_CTX_SCHED_AUTO;
if (m_deviceCount > 1) flags = CU_CTX_SCHED_YIELD;
for (int i = 0; i < m_deviceCount; i++)
{
cuDeviceGet(&m_devices[i].device, i);
cuCtxCreate(&m_devices[i].context, flags, m_devices[i].device);
cuCtxDestroy(m_devices[i].context);
}
}
}
~CudaContext()
{
printf("~CudaContext()\n");
if (m_deviceCount > 0)
{
for (int i = 0; i < m_deviceCount; i++)
{
cuCtxDestroy(m_devices[i].context);
}
delete [] m_devices;
}
}
public:
cuInitPtr cuInit;
cuDeviceGetPtr cuDeviceGet;
cuDeviceGetCountPtr cuDeviceGetCount;
cuDeviceGetNamePtr cuDeviceGetName;
cuDeviceComputeCapabilityPtr cuDeviceComputeCapability;
cuDeviceTotalMemPtr cuDeviceTotalMem;
cuDeviceGetPropertiesPtr cuDeviceGetProperties;
cuDeviceGetAttributePtr cuDeviceGetAttribute;
cuCtxCreatePtr cuCtxCreate;
cuCtxDestroyPtr cuCtxDestroy;
cuCtxAttachPtr cuCtxAttach;
cuCtxDetachPtr cuCtxDetach;
cuCtxPushCurrentPtr cuCtxPushCurrent;
cuCtxPopCurrentPtr cuCtxPopCurrent;
cuCtxGetDevicePtr cuCtxGetDevice;
cuCtxSynchronizePtr cuCtxSynchronize;
int m_deviceCount;
CudaDevice * m_devices;
};
int main(void)
{
CudaContext ctx;
// cuInit(0);
return 0;
}

View File

@ -1,56 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(nvtt::Format_BC1);
nvtt::OutputOptions outputOptions;
outputOptions.setFileName("output.dds");
nvtt::Context context;
nvtt::TexImage image = context.createTexImage();
image.load("kodim01.png");
float gamma = 2.2;
image.toLinear(gamma);
while (image.buildNextMipmap(nvtt::MipmapFilter_Box))
{
nvtt::TexImage tmpImage = image;
tmpImage.toGamma(gamma);
context.outputCompressed(tmpImage, compressionOptions, outputOptions);
// tmpImage.outputCompressed(compressionOptions, outputOptions);
}
return EXIT_SUCCESS;
}

View File

@ -1,344 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvtt/nvtt.h>
#include "cmdline.h"
extern "C" {
#include <libavcodec/avcodec.h>
//#include <libavformat/avformat.h>
}
// http://ffmpeg.mplayerhq.hu/general.html
// http://cekirdek.pardus.org.tr/~ismail/ffmpeg-docs/apiexample_8c-source.html
using namespace nv;
static float s_quality = 0.5f;
static AVFrame * createPicture(const Image & image)
{
const uint w = image.width();
const uint h = image.height();
const uint size = w * h;
AVFrame * picture = avcodec_alloc_frame();
uint8_t * buffer = (uint8_t *)malloc((size * 3) / 2);
picture->data[0] = buffer;
picture->data[1] = buffer + size;
picture->data[2] = buffer + size + size / 4;
picture->linesize[0] = w;
picture->linesize[1] = w / 2;
picture->linesize[2] = w / 2;
memset(buffer, 0, (size * 3) / 2);
// Convert image to YCbCr 4:2:0
// Y
for (uint y=0;y<h;y++)
{
for (uint x=0;x<w;x++)
{
Color32 c = image.pixel(x, y);
float R = (1 / 255.0f) * c.r;
float G = (1 / 255.0f) * c.g;
float B = (1 / 255.0f) * c.b;
//float Y = 0.299f * R + 0.587f * G + 0.114f * B;
float Y = 16 + (65.481f * R + 128.553f * G + 24.966f * B);
picture->data[0][y * picture->linesize[0] + x] = (uint8)clamp(Y, 0.0f, 255.0f);
}
}
// Cb and Cr
for (uint y=0;y<h/2;y++)
{
for (uint x=0;x<w/2;x++)
{
Color32 c0 = image.pixel(2*x+0, 2*y+0);
Color32 c1 = image.pixel(2*x+1, 2*y+0);
Color32 c2 = image.pixel(2*x+0, 2*y+1);
Color32 c3 = image.pixel(2*x+1, 2*y+1);
float R = (1 / 255.0f) * 0.25f * (c0.r + c1.r + c2.r + c3.r);
float G = (1 / 255.0f) * 0.25f * (c0.g + c1.g + c2.g + c3.g);
float B = (1 / 255.0f) * 0.25f * (c0.b + c1.b + c2.b + c3.b);
//float Pb = - 0.168736f * R - 0.331264f * G + 0.5f * B;
//float Pr = + 0.5f * R - 0.418688f * G - 0.081312f * B;
float Cb = 128 + (-37.797f * R - 74.203f * G + 112.0f * B);
float Cr = 128 + (112.0f * R - 93.786 * G - 18.214f * B);
picture->data[1][y * picture->linesize[1] + x] = (uint8)clamp(Cb, 0.0f, 255.0f);;
picture->data[2][y * picture->linesize[2] + x] = (uint8)clamp(Cr, 0.0f, 255.0f);;
}
}
return picture;
}
static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize, const char * filename)
{
FILE * f = fopen(filename, "w");
fprintf(f,"P5\n%d %d\n%d\n",xsize, ysize, 255);
for (int i = 0; i < ysize; i++)
fwrite(buf + i * wrap,1,xsize,f);
fclose(f);
}
static void savePicture(const AVFrame * picture, int w, int h)
{
// @@ Combine planes.
pgm_save(picture->data[0], picture->linesize[0], w, h, "test_y.pgm");
pgm_save(picture->data[1], picture->linesize[1], w/2, h/2, "test_u.pgm");
pgm_save(picture->data[2], picture->linesize[2], w/2, h/2, "test_v.pgm");
}
static double psnr(double d) {
return -10.0*log(d)/log(10.0);
}
static void encodeFrame(const Image & image, CodecID format, Array<uint8> & frame)
{
AVFrame * picture = createPicture(image);
AVCodec * encoder = avcodec_find_encoder(format);
if (encoder == NULL)
{
printf("MPEG encoder not found.\n");
exit(1);
}
AVCodecContext * encoder_context = avcodec_alloc_context();
//encoder_context->me_method = 0;
encoder_context->width = image.width();
encoder_context->height = image.height();
encoder_context->pix_fmt = PIX_FMT_YUV420P;
//encoder_context->pix_fmt = PIX_FMT_YUV422P;
//encoder_context->pix_fmt = PIX_FMT_YUVJ420P;
encoder_context->time_base = (AVRational){1,25}; // required parameter. 25 fps?
encoder_context->bit_rate = 400000; // Quality?
//encoder_context->bit_rate = 200000; // Default
//encoder_context->bit_rate_tolerance = 20000;
//encoder_context->qmin = ?;
//encoder_context->qmax = ?;
//encoder_context->qcompress = ?;
//encoder_context->qblur = ?;
encoder_context->flags |= CODEC_FLAG_PSNR;
encoder_context->qcompress = s_quality;
//encoder_context->qblur = 1.0f;
//encoder_context->global_quality = FF_QP2LAMBDA * 0;
//encoder_context->max_qdiff = 3;
// Intra frames only
encoder_context->gop_size = 0;
if (avcodec_open(encoder_context, encoder) < 0)
{
printf("MPEG encoder initialization failed.\n");
exit(1);
}
frame.resize(1024 * 1024, 0); // resize and initialize to 0.
int out_size = avcodec_encode_video(encoder_context, frame.mutableBuffer(), frame.size(), picture);
frame.resize(out_size);
// Append sequence end code.
frame.append(0x00);
frame.append(0x00);
frame.append(0x01);
frame.append(0xb7);
int in_size = image.width() * image.height() * 3;
printf("Image size %d -> %d (1:%d)\n", in_size, out_size, in_size/out_size);
printf("PSNR = %4.2f\n", psnr(encoder_context->coded_frame->error[0]/(encoder_context->width*encoder_context->height*255.0*255.0)));
avcodec_close(encoder_context);
av_free(encoder_context);
av_free(picture);
}
static void decodeFrame(const Array<uint8> & frame, CodecID format)
{
AVCodec * decoder = avcodec_find_decoder(format);
if (decoder == NULL) {
printf("MPEG decoder not found.\n");
exit(1);
}
AVCodecContext * decoder_context = avcodec_alloc_context();
AVFrame * picture = avcodec_alloc_frame();
if (decoder->capabilities & CODEC_CAP_TRUNCATED)
decoder_context->flags |= CODEC_FLAG_TRUNCATED; /* we do not send complete frames */
if (avcodec_open(decoder_context, decoder) < 0) {
printf("MPEG decoder initialization failed.\n");
exit(1);
}
//memset(picture->data[0], 0, in_size / 2);
int got_picture = 0;
int len = avcodec_decode_video(decoder_context, picture, &got_picture, frame.buffer(), frame.size());
printf("decoded %d bytes\n", len);
if (len < 0) {
printf("Error while decoding frame.\n");
exit(1);
}
if (!got_picture) {
printf("Did not get any picture.\n");
exit(1);
}
//nvDebugCheck(outbuf_size == len);
//nvDebugCheck(got_picture == true);
savePicture(picture, decoder_context->width, decoder_context->height);
avcodec_close(decoder_context);
av_free(decoder_context);
av_free(picture);
}
int main(int argc, char *argv[])
{
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
nv::Path input;
nv::Path output;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quality") == 0)
{
if (i+1 < argc && argv[i+1][0] != '-')
{
s_quality = atof(argv[i+1]);
i++;
}
}
else if (argv[i][0] != '-')
{
input = argv[i];
if (i+1 < argc && argv[i+1][0] != '-')
{
output = argv[i+1];
i++;
}
else
{
output.copy(input.str());
output.stripExtension();
output.append(".mpeg");
}
break;
}
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007-2008\n\n");
if (input.isNull())
{
printf("usage: nvmpegcompress [options] infile [outfile]\n\n");
return 1;
}
// Load image.
Image image;
if (!image.load(input))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return 1;
}
// Initialize codecs.
avcodec_init();
avcodec_register_all();
//CodecID format = CODEC_ID_MPEG1VIDEO;
CodecID format = CODEC_ID_MPEG2VIDEO;
//CodecID format = CODEC_ID_MJPEG;
//CodecID format = CODEC_ID_THEORA;
//CodecID format = CODEC_ID_H264;
// Encode frame.
Array<uint8> frame;
encodeFrame(image, format, frame);
// Save resulting I-frame.
StdOutputStream outputStream(output.str());
if (outputStream.isError())
{
printf("Error opening '%s' for writing.\n", output.str());
return 1;
}
outputStream.serialize(frame.mutableBuffer(), frame.size());
//decodeFrame(frame, format);
// @@ Compare image against original, and compute RMS.
return 0;
}

View File

@ -1,435 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <nvimage/Image.h>
#include <nvimage/ImageIO.h>
#include <nvimage/BlockDXT.h>
#include <nvimage/ColorBlock.h>
#include <nvcore/Ptr.h>
#include <nvcore/Debug.h>
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvcore/TextWriter.h>
#include <nvcore/FileSystem.h>
#include <stdlib.h> // free
#include <string.h> // memcpy
#include <time.h> // clock
using namespace nv;
// Kodak image set
static const char * s_kodakImageSet[] = {
"kodim01.png",
"kodim02.png",
"kodim03.png",
"kodim04.png",
"kodim05.png",
"kodim06.png",
"kodim07.png",
"kodim08.png",
"kodim09.png",
"kodim10.png",
"kodim11.png",
"kodim12.png",
"kodim13.png",
"kodim14.png",
"kodim15.png",
"kodim16.png",
"kodim17.png",
"kodim18.png",
"kodim19.png",
"kodim20.png",
"kodim21.png",
"kodim22.png",
"kodim23.png",
"kodim24.png",
};
// Waterloo image set
static const char * s_waterlooImageSet[] = {
"clegg.png",
"frymire.png",
"lena.png",
"monarch.png",
"peppers.png",
"sail.png",
"serrano.png",
"tulips.png",
};
// Epic image set
static const char * s_epicImageSet[] = {
"Bradley1.png",
"Gradient.png",
"MoreRocks.png",
"Wall.png",
"Rainbow.png",
"Text.png",
};
// Farbrausch
static const char * s_farbrauschImageSet[] = {
"t.2d.pn02.bmp",
"t.aircondition.01.bmp",
"t.bricks.02.bmp",
"t.bricks.05.bmp",
"t.concrete.cracked.01.bmp",
"t.envi.colored02.bmp",
"t.envi.colored03.bmp",
"t.font.01.bmp",
"t.sewers.01.bmp",
"t.train.03.bmp",
"t.yello.01.bmp",
};
struct ImageSet
{
const char ** fileNames;
int fileCount;
};
static ImageSet s_imageSets[] = {
{s_kodakImageSet, sizeof(s_kodakImageSet)/sizeof(s_kodakImageSet[0])},
{s_waterlooImageSet, sizeof(s_waterlooImageSet)/sizeof(s_waterlooImageSet[0])},
{s_epicImageSet, sizeof(s_epicImageSet)/sizeof(s_epicImageSet[0])},
{s_farbrauschImageSet, sizeof(s_farbrauschImageSet)/sizeof(s_farbrauschImageSet[0])},
};
const int s_imageSetCount = sizeof(s_imageSets)/sizeof(s_imageSets[0]);
struct MyOutputHandler : public nvtt::OutputHandler
{
MyOutputHandler() : m_data(NULL), m_ptr(NULL) {}
~MyOutputHandler()
{
free(m_data);
}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{
m_size = size;
m_width = width;
m_height = height;
free(m_data);
m_data = (unsigned char *)malloc(size);
m_ptr = m_data;
}
virtual bool writeData(const void * data, int size)
{
memcpy(m_ptr, data, size);
m_ptr += size;
return true;
}
Image * decompress(nvtt::Format format)
{
int bw = (m_width + 3) / 4;
int bh = (m_height + 3) / 4;
AutoPtr<Image> img( new Image() );
img->allocate(m_width, m_height);
if (format == nvtt::Format_BC1)
{
BlockDXT1 * block = (BlockDXT1 *)m_data;
for (int y = 0; y < bh; y++)
{
for (int x = 0; x < bw; x++)
{
ColorBlock colors;
block->decodeBlock(&colors);
for (int yy = 0; yy < 4; yy++)
{
for (int xx = 0; xx < 4; xx++)
{
Color32 c = colors.color(xx, yy);
if (x * 4 + xx < m_width && y * 4 + yy < m_height)
{
img->pixel(x * 4 + xx, y * 4 + yy) = c;
}
}
}
block++;
}
}
}
return img.release();
}
int m_size;
int m_width;
int m_height;
unsigned char * m_data;
unsigned char * m_ptr;
};
float rmsError(const Image * a, const Image * b)
{
nvCheck(a != NULL);
nvCheck(b != NULL);
nvCheck(a->width() == b->width());
nvCheck(a->height() == b->height());
int mse = 0;
const uint count = a->width() * a->height();
for (uint i = 0; i < count; i++)
{
Color32 c0 = a->pixel(i);
Color32 c1 = b->pixel(i);
int r = c0.r - c1.r;
int g = c0.g - c1.g;
int b = c0.b - c1.b;
//int a = c0.a - c1.a;
mse += r * r;
mse += g * g;
mse += b * b;
}
return sqrtf(float(mse) / count);
}
int main(int argc, char *argv[])
{
const uint version = nvtt::version();
const uint major = version / 100;
const uint minor = version % 100;
printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007 - 2008\n\n", major, minor);
int set = 0;
bool fast = false;
bool nocuda = false;
bool showHelp = false;
const char * basePath = "";
const char * outPath = "output";
const char * regressPath = NULL;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
if (strcmp("-set", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
set = atoi(argv[i+1]);
i++;
}
}
else if (strcmp("-fast", argv[i]) == 0)
{
fast = true;
}
else if (strcmp("-nocuda", argv[i]) == 0)
{
nocuda = true;
}
else if (strcmp("-help", argv[i]) == 0)
{
showHelp = true;
}
else if (strcmp("-path", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
basePath = argv[i+1];
i++;
}
}
else if (strcmp("-out", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
outPath = argv[i+1];
i++;
}
}
else if (strcmp("-regress", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
regressPath = argv[i+1];
i++;
}
}
}
if (showHelp)
{
printf("usage: nvtestsuite [options]\n\n");
printf("Input options:\n");
printf(" -path <path>\tInput image path.\n");
printf(" -set [0:2]\tImage set.\n");
printf(" -regress <path>\tRegression directory.\n");
printf("Compression options:\n");
printf(" -fast \tFast compression.\n");
printf(" -nocuda \tDo not use cuda compressor.\n");
printf("Output options:\n");
printf(" -out <path> \tOutput directory.\n");
return 1;
}
nvtt::InputOptions inputOptions;
inputOptions.setMipmapGeneration(false);
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(nvtt::Format_BC1);
if (fast)
{
compressionOptions.setQuality(nvtt::Quality_Fastest);
}
else
{
compressionOptions.setQuality(nvtt::Quality_Production);
}
nvtt::OutputOptions outputOptions;
outputOptions.setOutputHeader(false);
MyOutputHandler outputHandler;
outputOptions.setOutputHandler(&outputHandler);
nvtt::Context context;
context.enableCudaAcceleration(!nocuda);
FileSystem::changeDirectory(basePath);
FileSystem::createDirectory(outPath);
Path csvFileName;
csvFileName.format("%s/result.csv", outPath);
StdOutputStream csvStream(csvFileName);
TextWriter csvWriter(&csvStream);
float totalTime = 0;
float totalRMSE = 0;
int failedTests = 0;
float totalDiff = 0;
const char ** fileNames = s_imageSets[set].fileNames;
int fileCount = s_imageSets[set].fileCount;
for (int i = 0; i < fileCount; i++)
{
AutoPtr<Image> img( new Image() );
if (!img->load(fileNames[i]))
{
printf("Input image '%s' not found.\n", fileNames[i]);
return EXIT_FAILURE;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, img->width(), img->height());
inputOptions.setMipmapData(img->pixels(), img->width(), img->height());
printf("Compressing: \t'%s'\n", fileNames[i]);
clock_t start = clock();
context.process(inputOptions, compressionOptions, outputOptions);
clock_t end = clock();
printf(" Time: \t%.3f sec\n", float(end-start) / CLOCKS_PER_SEC);
totalTime += float(end-start);
AutoPtr<Image> img_out( outputHandler.decompress(nvtt::Format_BC1) );
Path outputFileName;
outputFileName.format("%s/%s", outPath, fileNames[i]);
outputFileName.stripExtension();
outputFileName.append(".png");
if (!ImageIO::save(outputFileName, img_out.ptr()))
{
printf("Error saving file '%s'.\n", outputFileName.str());
}
float rmse = rmsError(img.ptr(), img_out.ptr());
totalRMSE += rmse;
printf(" RMSE: \t%.4f\n", rmse);
// Output csv file
csvWriter << "\"" << fileNames[i] << "\"," << rmse << "\n";
if (regressPath != NULL)
{
Path regressFileName;
regressFileName.format("%s/%s", regressPath, fileNames[i]);
regressFileName.stripExtension();
regressFileName.append(".png");
AutoPtr<Image> img_reg( new Image() );
if (!img_reg->load(regressFileName.str()))
{
printf("Regression image '%s' not found.\n", regressFileName.str());
return EXIT_FAILURE;
}
float rmse_reg = rmsError(img.ptr(), img_reg.ptr());
float diff = rmse_reg - rmse;
totalDiff += diff;
const char * text = "PASSED";
if (equal(diff, 0)) text = "PASSED";
else if (diff < 0) {
text = "FAILED";
failedTests++;
}
printf(" Diff: \t%.4f (%s)\n", diff, text);
}
fflush(stdout);
}
totalRMSE /= fileCount;
totalDiff /= fileCount;
printf("Total Results:\n");
printf(" Total Time: \t%.3f sec\n", totalTime / CLOCKS_PER_SEC);
printf(" Average RMSE:\t%.4f\n", totalRMSE);
if (regressPath != NULL)
{
printf("Regression Results:\n");
printf(" Diff: %.4f\n", totalDiff);
printf(" %d/%d tests failed.\n", failedTests, fileCount);
}
return EXIT_SUCCESS;
}

View File

@ -1,63 +0,0 @@
ADD_EXECUTABLE(nvcompress compress.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvdecompress decompress.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage)
ADD_EXECUTABLE(nvddsinfo ddsinfo.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage)
ADD_EXECUTABLE(nvimgdiff imgdiff.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage)
ADD_EXECUTABLE(nvassemble assemble.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(nvzoom resize.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
ADD_EXECUTABLE(nv-gnome-thumbnailer thumbnailer.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nv-gnome-thumbnailer nvcore nvmath nvimage)
INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom nv-gnome-thumbnailer DESTINATION bin)
# Use gconftool-2 to install gnome thumbnailer
FIND_PROGRAM(GCONFTOOL2 gconftool-2)
IF(GCONFTOOL2)
CONFIGURE_FILE(nvtt-thumbnailer.schema.in ${CMAKE_CURRENT_BINARY_DIR}/nvtt-thumbnailer.schema)
INSTALL(CODE "MESSAGE(STATUS \"Installing thumbnailer schema\")")
#gconftool-2 --get-default-source
INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${GCONFTOOL2} --get-default-source OUTPUT_VARIABLE GCONF_CONFIG_SOURCE OUTPUT_STRIP_TRAILING_WHITESPACE)")
INSTALL(CODE "set(ENV{GCONF_CONFIG_SOURCE} \"\${GCONF_CONFIG_SOURCE}\")")
INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${GCONFTOOL2} --makefile-install-rule ${CMAKE_CURRENT_BINARY_DIR}/nvtt-thumbnailer.schema)")
ENDIF(GCONFTOOL2)
# UI tools
IF(QT4_FOUND) # AND NOT MSVC)
SET(QT_USE_QTOPENGL TRUE)
INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
SET(SRCS
ui/main.cpp
ui/configdialog.h
ui/configdialog.cpp)
SET(LIBS
nvtt
${QT_QTCORE_LIBRARY}
${QT_QTGUI_LIBRARY}
${QT_QTOPENGL_LIBRARY})
QT4_WRAP_UI(UICS ui/configdialog.ui)
QT4_WRAP_CPP(MOCS ui/configdialog.h)
#QT4_ADD_RESOURCES(RCCS ui/configdialog.rc)
ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS})
TARGET_LINK_LIBRARIES(nvcompressui ${LIBS})
ENDIF(QT4_FOUND) # AND NOT MSVC)

View File

@ -21,20 +21,20 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "cmdline.h"
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvimage/Image.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvtt/nvtt.h>
#include <nvimage/Image.h> // @@ It might be a good idea to use FreeImage directly instead of ImageIO.
#include <nvimage/ImageIO.h>
#include <nvimage/FloatImage.h>
#include <nvimage/DirectDrawSurface.h>
#include "cmdline.h"
#include <nvcore/Ptr.h>
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvcore/FileSystem.h>
#include <nvcore/Timer.h>
#include <time.h> // clock
//#define WINDOWS_LEAN_AND_MEAN
//#include <windows.h> // TIMER
struct MyOutputHandler : public nvtt::OutputHandler
@ -87,10 +87,7 @@ struct MyErrorHandler : public nvtt::ErrorHandler
{
virtual void error(nvtt::Error e)
{
#if _DEBUG
nvDebugBreak();
#endif
printf("Error: '%s'\n", nvtt::errorString(e));
}
};
@ -134,24 +131,18 @@ int main(int argc, char *argv[])
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
bool alpha = false;
bool normal = false;
bool color2normal = false;
bool wrapRepeat = false;
bool noMipmaps = false;
bool fast = false;
bool nocuda = false;
bool silent = false;
bool bc1n = false;
nvtt::Format format = nvtt::Format_BC1;
bool premultiplyAlpha = false;
nvtt::MipmapFilter mipmapFilter = nvtt::MipmapFilter_Box;
bool loadAsFloat = false;
const char * externalCompressor = NULL;
bool silent = false;
bool dds10 = false;
nv::Path input;
nv::Path output;
@ -163,10 +154,6 @@ int main(int argc, char *argv[])
if (strcmp("-color", argv[i]) == 0)
{
}
else if (strcmp("-alpha", argv[i]) == 0)
{
alpha = true;
}
else if (strcmp("-normal", argv[i]) == 0)
{
normal = true;
@ -186,23 +173,6 @@ int main(int argc, char *argv[])
{
noMipmaps = true;
}
else if (strcmp("-premula", argv[i]) == 0)
{
premultiplyAlpha = true;
}
else if (strcmp("-mipfilter", argv[i]) == 0)
{
if (i+1 == argc) break;
i++;
if (strcmp("box", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Box;
else if (strcmp("triangle", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Triangle;
else if (strcmp("kaiser", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Kaiser;
}
else if (strcmp("-float", argv[i]) == 0)
{
loadAsFloat = true;
}
// Compression options.
else if (strcmp("-fast", argv[i]) == 0)
@ -260,15 +230,11 @@ int main(int argc, char *argv[])
}
}
// Output options
// Misc options
else if (strcmp("-silent", argv[i]) == 0)
{
silent = true;
}
else if (strcmp("-dds10", argv[i]) == 0)
{
dds10 = true;
}
else if (argv[i][0] != '-')
{
@ -288,27 +254,19 @@ int main(int argc, char *argv[])
}
}
const uint version = nvtt::version();
const uint major = version / 100;
const uint minor = version % 100;
printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007\n\n", major, minor);
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
if (input.isNull())
{
printf("usage: nvcompress [options] infile [outfile]\n\n");
printf("Input options:\n");
printf(" -color \tThe input image is a color map (default).\n");
printf(" -alpha \tThe input image has an alpha channel used for transparency.\n");
printf(" -normal \tThe input image is a normal map.\n");
printf(" -tonormal \tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n");
printf(" -repeat \tRepeat wrapping mode.\n");
printf(" -nomips \tDisable mipmap generation.\n");
printf(" -premula \tPremultiply alpha into color channel.\n");
printf(" -mipfilter \tMipmap filter. One of the following: box, triangle, kaiser.\n\n");
printf(" -color \tThe input image is a color map (default).\n");
printf(" -normal \tThe input image is a normal map.\n");
printf(" -tonormal\tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n");
printf(" -repeat \tRepeat wrapping mode.\n");
printf(" -nomips \tDisable mipmap generation.\n\n");
printf("Compression options:\n");
printf(" -fast \tFast compression.\n");
@ -323,19 +281,10 @@ int main(int argc, char *argv[])
printf(" -bc4 \tBC4 format (ATI1)\n");
printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n");
printf("Output options:\n");
printf(" -silent \tDo not output progress messages\n");
printf(" -dds10 \tUse DirectX 10 DDS format\n\n");
return 1;
}
// Make sure input file exists.
if (!nv::FileSystem::exists(input.str()))
{
fprintf(stderr, "The file '%s' does not exist.\n", input.str());
return 1;
}
// @@ Make sure input file exists.
// Set input options.
nvtt::InputOptions inputOptions;
@ -377,7 +326,7 @@ int main(int argc, char *argv[])
{
for (uint m = 0; m < mipmapCount; m++)
{
dds.mipmap(&mipmap, f, m); // @@ Load as float.
dds.mipmap(&mipmap, f, m);
inputOptions.setMipmapData(mipmap.pixels(), mipmap.width(), mipmap.height(), 1, f, m);
}
@ -385,42 +334,16 @@ int main(int argc, char *argv[])
}
else
{
if (nv::strCaseCmp(input.extension(), ".exr") == 0)
// Regular image.
nv::Image image;
if (!image.load(input))
{
loadAsFloat = true;
}
if (loadAsFloat)
{
nv::AutoPtr<nv::FloatImage> image(nv::ImageIO::loadFloat(input));
if (image == NULL)
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return 1;
}
inputOptions.setFormat(nvtt::InputFormat_RGBA_32F);
inputOptions.setTextureLayout(nvtt::TextureType_2D, image->width(), image->height());
for (uint i = 0; i < image->componentNum(); i++)
{
inputOptions.setMipmapChannelData(image->channel(i), i, image->width(), image->height());
}
}
else
{
// Regular image.
nv::Image image;
if (!image.load(input))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return 1;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
inputOptions.setMipmapData(image.pixels(), image.width(), image.height());
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return 1;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
inputOptions.setMipmapData(image.pixels(), image.width(), image.height());
}
if (wrapRepeat)
@ -432,15 +355,6 @@ int main(int argc, char *argv[])
inputOptions.setWrapMode(nvtt::WrapMode_Clamp);
}
if (alpha)
{
inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency);
}
else
{
inputOptions.setAlphaMode(nvtt::AlphaMode_None);
}
if (normal)
{
setNormalMap(inputOptions);
@ -459,23 +373,8 @@ int main(int argc, char *argv[])
inputOptions.setMipmapGeneration(false);
}
if (premultiplyAlpha)
{
inputOptions.setPremultiplyAlpha(true);
inputOptions.setAlphaMode(nvtt::AlphaMode_Premultiplied);
}
inputOptions.setMipmapFilter(mipmapFilter);
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(format);
/*if (format == nvtt::Format_RGBA)
{
compressionOptions.setPixelType(nvtt::PixelType_Float);
compressionOptions.setPixelFormat(16, 16, 16, 16);
}*/
if (fast)
{
compressionOptions.setQuality(nvtt::Quality_Fastest);
@ -506,20 +405,20 @@ int main(int argc, char *argv[])
return 1;
}
nvtt::Context context;
context.enableCudaAcceleration(!nocuda);
nvtt::Compressor compressor;
compressor.enableCudaAcceleration(!nocuda);
printf("CUDA acceleration ");
if (context.isCudaAccelerationEnabled())
if (compressor.isCudaAccelerationEnabled())
{
printf("ENABLED\n\n");
}
else
{
printf("DISABLED\n\n");
}
}
outputHandler.setTotal(context.estimateSize(inputOptions, compressionOptions));
outputHandler.setTotal(compressor.estimateSize(inputOptions, compressionOptions));
outputHandler.setDisplayProgress(!silent);
nvtt::OutputOptions outputOptions;
@ -527,24 +426,30 @@ int main(int argc, char *argv[])
outputOptions.setOutputHandler(&outputHandler);
outputOptions.setErrorHandler(&errorHandler);
if (dds10)
{
outputOptions.setContainer(nvtt::Container_DDS10);
}
// printf("Press ENTER.\n");
// fflush(stdout);
// getchar();
Timer timer;
timer.start();
bool success = context.process(inputOptions, compressionOptions, outputOptions);
/* LARGE_INTEGER temp;
QueryPerformanceFrequency((LARGE_INTEGER*) &temp);
double freq = ((double) temp.QuadPart) / 1000.0;
if (success)
{
printf("\rtime taken: %.3f seconds\n", float(timer.elapsed()) / 1000.0f);
}
LARGE_INTEGER start_time;
QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
*/
clock_t start = clock();
compressor.process(inputOptions, compressionOptions, outputOptions);
/*
LARGE_INTEGER end_time;
QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
float diff_time = (float) (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
printf("\rtime taken: %.3f seconds\n", diff_time/1000);
*/
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return 0;
}

View File

@ -12,6 +12,9 @@
<property name="windowTitle" >
<string>NVIDIA Texture Tools</string>
</property>
<property name="windowIcon" >
<iconset/>
</property>
<property name="sizeGripEnabled" >
<bool>true</bool>
</property>

View File

@ -31,161 +31,41 @@
#include "cmdline.h"
#include <time.h> // clock
int main(int argc, char *argv[])
{
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
bool forcenormal = false;
bool mipmaps = false;
bool faces = false;
bool savePNG = false;
nv::Path input;
nv::Path output;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
if (strcmp("-forcenormal", argv[i]) == 0)
{
forcenormal = true;
}
else if (strcmp("-mipmaps", argv[i]) == 0)
{
mipmaps = true;
}
else if (strcmp("-faces", argv[i]) == 0)
{
faces = true;
}
else if (strcmp("-format", argv[i]) == 0)
{
if (i+1 == argc) break;
i++;
#ifdef HAVE_PNG
if (strcmp("png", argv[i]) == 0) savePNG = true;
else
#endif
if (strcmp("tga", argv[i]) == 0) savePNG = false;
else
{
fprintf(stderr, "Unsupported output format '%s', defaulting to 'tga'.\n", argv[i]);
savePNG = false;
}
}
else if (argv[i][0] != '-')
{
input = argv[i];
if (i+1 < argc && argv[i+1][0] != '-')
{
output = argv[i+1];
}
else
{
output.copy(input.str());
}
break;
}
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
if (input.isNull())
if (argc != 2)
{
printf("usage: nvdecompress [options] infile [outfile]\n\n");
printf("Note: the .tga or .png extension is forced on outfile\n\n");
printf("Input options:\n");
printf(" -forcenormal \tThe input image is a normal map.\n");
printf(" -mipmaps \tDecompress all mipmaps.\n");
printf(" -faces \tDecompress all faces.\n");
printf(" -format <format>\tOutput format ('tga' or 'png').\n");
return 1;
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
printf("usage: nvdecompress 'ddsfile'\n\n");
return 1;
}
// Load surface.
nv::DirectDrawSurface dds(input);
nv::DirectDrawSurface dds(argv[1]);
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
return 1;
}
if (!dds.isSupported() || dds.isTexture3D())
{
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
printf("The file '%s' is not a valid DDS file.\n", argv[1]);
return 1;
}
uint faceCount;
if (dds.isTexture2D())
{
faceCount = 1;
}
else
{
nvCheck(dds.isTextureCube());
faceCount = 6;
nv::Path name(argv[1]);
name.stripExtension();
name.append(".tga");
nv::StdOutputStream stream(name.str());
if (stream.isError()) {
printf("Error opening '%s' for writting\n", name.str());
return 1;
}
uint mipmapCount = dds.mipmapCount();
clock_t start = clock();
// apply arguments
if (forcenormal)
{
dds.setNormalFlag(true);
}
if (!faces)
{
faceCount = 1;
}
if (!mipmaps)
{
mipmapCount = 1;
}
// @@ TODO: Add command line options to output mipmaps, cubemap faces, etc.
nv::Image img;
dds.mipmap(&img, 0, 0); // get first image
nv::ImageIO::saveTGA(stream, &img);
nv::Image mipmap;
nv::Path name;
// strip extension, we force the tga extension
output.stripExtension();
// extract faces and mipmaps
for (uint f = 0; f < faceCount; f++)
{
for (uint m = 0; m < mipmapCount; m++)
{
dds.mipmap(&mipmap, f, m);
// set output filename, if we are doing faces and/or mipmaps
name.copy(output);
if (faces) name.appendFormat("_face%d", f);
if (mipmaps) name.appendFormat("_mipmap%d", m);
name.append(savePNG ? ".png" : ".tga");
nv::StdOutputStream stream(name.str());
if (stream.isError()) {
fprintf(stderr, "Error opening '%s' for writting\n", name.str());
return 1;
}
nv::ImageIO::save(name, stream, &mipmap);
}
}
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return 0;
}

View File

@ -1,26 +0,0 @@
<gconfschemafile>
<schemalist>
<schema>
<key>/schemas/desktop/gnome/thumbnailers/image@x-dds/enable</key>
<applyto>/desktop/gnome/thumbnailers/image@x-dds/enable</applyto>
<owner>nvtt-thumbnailer</owner>
<type>bool</type>
<default>true</default>
<locale name="C">
<short></short>
<long></long>
</locale>
</schema>
<schema>
<key>/schemas/desktop/gnome/thumbnailers/image@x-dds/command</key>
<applyto>/desktop/gnome/thumbnailers/image@x-dds/command</applyto>
<owner>nvtt-thumbnailer</owner>
<type>string</type>
<default>@CMAKE_INSTALL_PREFIX@/bin/nv-gnome-thumbnailer -s %s %i %o</default>
<locale name="C">
<short></short>
<long></long>
</locale>
</schema>
</schemalist>
</gconfschemafile>

View File

@ -176,7 +176,7 @@ int main(int argc, char *argv[])
result->setFormat(nv::Image::Format_ARGB);
nv::StdOutputStream stream(output);
nv::ImageIO::save(output, stream, result.ptr());
nv::ImageIO::saveTGA(stream, result.ptr()); // @@ Add generic save function. Add support for png too.
return 0;
}

View File

@ -1,158 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Ptr.h>
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvcore/Containers.h>
#include <nvimage/Image.h>
#include <nvimage/ImageIO.h>
#include <nvimage/FloatImage.h>
#include <nvimage/Filter.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvmath/Color.h>
#include <nvmath/Vector.h>
#include <math.h>
#include "cmdline.h"
static bool loadImage(nv::Image & image, const char * fileName)
{
if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0)
{
nv::DirectDrawSurface dds(fileName);
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", fileName);
return false;
}
dds.mipmap(&image, 0, 0); // get first image
}
else
{
// Regular image.
if (!image.load(fileName))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", fileName);
return false;
}
}
return true;
}
int main(int argc, char *argv[])
{
//MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
float gamma = 2.2f;
nv::Path input;
nv::Path output;
int size = 128;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
// Input options.
if (strcmp("-s", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
size = (int)atoi(argv[i+1]);
i++;
}
}
else if (argv[i][0] != '-')
{
input = argv[i];
if (i+1 < argc && argv[i+1][0] != '-') {
output = argv[i+1];
}
else
{
fprintf(stderr, "No output filename.\n");
return 1;
}
break;
}
}
if (input.isNull() || output.isNull())
{
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
printf("usage: nv-gnome-thumbnailer [options] input output\n\n");
printf("Options:\n");
printf(" -s size\tThumbnail size (default = 128)\n");
return 1;
}
nv::Image image;
if (!loadImage(image, input)) return 1;
nv::ImageIO::ImageMetaData metaData;
metaData.tagMap.add("Thumb::Image::Width", nv::StringBuilder().number (image.width()));
metaData.tagMap.add("Thumb::Image::Height", nv::StringBuilder().number (image.height()));
if ((image.width() > size) || (image.height() > size))
{
nv::FloatImage fimage(&image);
fimage.toLinear(0, 3, gamma);
uint thumbW, thumbH;
if (image.width() > image.height())
{
thumbW = size;
thumbH = uint ((float (image.height()) / float (image.width())) * size);
}
else
{
thumbW = uint ((float (image.width()) / float (image.height())) * size);
thumbH = size;
}
nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp));
nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
result->setFormat(nv::Image::Format_ARGB);
nv::StdOutputStream stream(output);
nv::ImageIO::save(output, stream, result.ptr(), &metaData);
}
else
{
nv::StdOutputStream stream(output);
nv::ImageIO::save(output, stream, &image, &metaData);
}
return 0;
}