Tag 2.0.8 for release.

This commit is contained in:
castano
2010-05-14 18:01:41 +00:00
parent f6a39d6eab
commit eb01ca604f
375 changed files with 12760 additions and 28091 deletions

View File

@ -5,17 +5,14 @@ ADD_SUBDIRECTORY(squish)
SET(NVTT_SRCS
nvtt.h
nvtt.cpp
Context.h
Context.cpp
Compressor.h
Compressor.cpp
nvtt_wrapper.h
nvtt_wrapper.cpp
Compressor.h
CompressorDXT.h
CompressorDXT.cpp
CompressorRGB.h
CompressorRGB.cpp
CompressorRGBE.h
CompressorRGBE.cpp
CompressDXT.h
CompressDXT.cpp
CompressRGB.h
CompressRGB.cpp
QuickCompressDXT.h
QuickCompressDXT.cpp
OptimalCompressDXT.h
@ -27,27 +24,27 @@ SET(NVTT_SRCS
InputOptions.cpp
OutputOptions.h
OutputOptions.cpp
TexImage.h TexImage.cpp
cuda/CudaUtils.h
cuda/CudaUtils.cpp
cuda/CudaMath.h
cuda/BitmapTable.h
cuda/CudaCompressorDXT.h
cuda/CudaCompressorDXT.cpp)
cuda/Bitmaps.h
cuda/CudaCompressDXT.h
cuda/CudaCompressDXT.cpp)
IF (CUDA_FOUND)
IF(CUDA_FOUND)
ADD_DEFINITIONS(-DHAVE_CUDA)
CUDA_COMPILE(CUDA_SRCS cuda/CompressKernel.cu)
WRAP_CUDA(CUDA_SRCS cuda/CompressKernel.cu)
SET(NVTT_SRCS ${NVTT_SRCS} ${CUDA_SRCS})
SET(LIBS ${LIBS} ${CUDA_LIBRARIES})
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
ENDIF (CUDA_FOUND)
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_PATH})
ENDIF(CUDA_FOUND)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVTT_EXPORTS)
IF(NVTT_SHARED)
IF(NVTT_SHARED)
ADD_DEFINITIONS(-DNVTT_SHARED=1)
ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
@ -63,5 +60,54 @@ INSTALL(TARGETS nvtt
INSTALL(FILES nvtt.h DESTINATION include/nvtt)
ADD_SUBDIRECTORY(tools)
ADD_SUBDIRECTORY(tests)
# test executables
ADD_EXECUTABLE(nvcompress tools/compress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvdecompress tools/decompress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage)
ADD_EXECUTABLE(nvddsinfo tools/ddsinfo.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage)
ADD_EXECUTABLE(nvimgdiff tools/imgdiff.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage)
ADD_EXECUTABLE(nvassemble tools/assemble.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom DESTINATION bin)
# UI tools
IF(QT4_FOUND AND NOT MSVC)
SET(QT_USE_QTOPENGL TRUE)
INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
SET(SRCS
tools/main.cpp
tools/configdialog.h
tools/configdialog.cpp)
SET(LIBS
nvtt
${QT_QTCORE_LIBRARY}
${QT_QTGUI_LIBRARY}
${QT_QTOPENGL_LIBRARY})
QT4_WRAP_UI(UICS tools/configdialog.ui)
QT4_WRAP_CPP(MOCS tools/configdialog.h)
#QT4_ADD_RESOURCES(RCCS tools/configdialog.rc)
ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS})
TARGET_LINK_LIBRARIES(nvcompressui ${LIBS})
ENDIF(QT4_FOUND AND NOT MSVC)

597
src/nvtt/CompressDXT.cpp Normal file
View File

@ -0,0 +1,597 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Memory.h>
#include <nvimage/Image.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "nvtt.h"
#include "CompressDXT.h"
#include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
// squish
#include "squish/colourset.h"
//#include "squish/clusterfit.h"
#include "squish/fastclusterfit.h"
#include "squish/weightedclusterfit.h"
// s3_quant
#if defined(HAVE_S3QUANT)
#include "s3tc/s3_quant.h"
#endif
// ati tc
#if defined(HAVE_ATITC)
#include "atitc/ATI_Compress.h"
#endif
//#include <time.h>
using namespace nv;
using namespace nvtt;
nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
{
}
nv::FastCompressor::~FastCompressor()
{
}
void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
QuickCompress::compressDXT1(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
QuickCompress::compressDXT1a(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT3 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
QuickCompress::compressDXT3(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
QuickCompress::compressDXT5(rgba, &block, 0);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.swizzleDXT5n();
QuickCompress::compressDXT5(rgba, &block, 0);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
{
}
nv::SlowCompressor::~SlowCompressor()
{
}
void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
squish::WeightedClusterFit fit;
//squish::ClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT1 block;
squish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
bool anyAlpha = false;
bool allAlpha = true;
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a < 128) anyAlpha = true;
else allAlpha = false;
}
if ((!anyAlpha && rgba.isSingleColor() || allAlpha))
{
OptimalCompress::compressDXT1a(rgba.color(0), &block);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT3 block;
squish::WeightedClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
// Compress explicit alpha.
OptimalCompress::compressDXT3A(rgba, &block.alpha);
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
squish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
// Compress alpha.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block.alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block.alpha);
}
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0);
fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
BlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
rgba.swizzleDXT5n();
// Compress X.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block.alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block.alpha);
}
// Compress Y.
OptimalCompress::compressDXT1G(rgba, &block.color);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba;
AlphaBlockDXT5 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(m_image, x, y);
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block);
}
else
{
QuickCompress::compressDXT5A(rgba, &block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock xcolor;
ColorBlock ycolor;
BlockATI2 block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
xcolor.init(m_image, x, y);
xcolor.splatX();
ycolor.init(m_image, x, y);
ycolor.splatY();
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(xcolor, &block.x);
OptimalCompress::compressDXT5A(ycolor, &block.y);
}
else
{
QuickCompress::compressDXT5A(xcolor, &block.x);
QuickCompress::compressDXT5A(ycolor, &block.y);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
}
}
}
}
#if defined(HAVE_S3QUANT)
void nv::s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{
const uint w = image->width();
const uint h = image->height();
float error = 0.0f;
BlockDXT1 dxtBlock3;
BlockDXT1 dxtBlock4;
ColorBlock block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
block.init(image, x, y);
// Init rgb block.
RGBBlock rgbBlock;
rgbBlock.n = 16;
for (uint i = 0; i < 16; i++) {
rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f);
}
rgbBlock.weight[0] = 1.0f;
rgbBlock.weight[1] = 1.0f;
rgbBlock.weight[2] = 1.0f;
rgbBlock.inLevel = 4;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock4.col0.r = rgbBlock.endPoint[0][0];
dxtBlock4.col0.g = rgbBlock.endPoint[0][1];
dxtBlock4.col0.b = rgbBlock.endPoint[0][2];
dxtBlock4.col1.r = rgbBlock.endPoint[1][0];
dxtBlock4.col1.g = rgbBlock.endPoint[1][1];
dxtBlock4.col1.b = rgbBlock.endPoint[1][2];
dxtBlock4.setIndices(rgbBlock.index);
if (dxtBlock4.col0.u < dxtBlock4.col1.u) {
swap(dxtBlock4.col0.u, dxtBlock4.col1.u);
dxtBlock4.indices ^= 0x55555555;
}
uint error4 = blockError(block, dxtBlock4);
rgbBlock.inLevel = 3;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock3.col0.r = rgbBlock.endPoint[0][0];
dxtBlock3.col0.g = rgbBlock.endPoint[0][1];
dxtBlock3.col0.b = rgbBlock.endPoint[0][2];
dxtBlock3.col1.r = rgbBlock.endPoint[1][0];
dxtBlock3.col1.g = rgbBlock.endPoint[1][1];
dxtBlock3.col1.b = rgbBlock.endPoint[1][2];
dxtBlock3.setIndices(rgbBlock.index);
if (dxtBlock3.col0.u > dxtBlock3.col1.u) {
swap(dxtBlock3.col0.u, dxtBlock3.col1.u);
dxtBlock3.indices ^= (~dxtBlock3.indices >> 1) & 0x55555555;
}
uint error3 = blockError(block, dxtBlock3);
if (error3 < error4) {
error += error3;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3));
}
}
else {
error += error4;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4));
}
}
}
}
printf("error = %f\n", error/((w+3)/4 * (h+3)/4));
}
#endif // defined(HAVE_S3QUANT)
#if defined(HAVE_ATITC)
void nv::atiCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
{
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = image->width();
srcTexture.dwHeight = image->height();
srcTexture.dwPitch = image->width() * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) image->pixels();
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = image->width();
destTexture.dwHeight = image->height();
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
}
#endif // defined(HAVE_ATITC)

87
src/nvtt/CompressDXT.h Normal file
View File

@ -0,0 +1,87 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSDXT_H
#define NV_TT_COMPRESSDXT_H
#include <nvimage/nvimage.h>
#include "nvtt.h"
namespace nv
{
class Image;
class FloatImage;
class FastCompressor
{
public:
FastCompressor();
~FastCompressor();
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
class SlowCompressor
{
public:
SlowCompressor();
~SlowCompressor();
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
// External compressors.
#if defined(HAVE_S3QUANT)
void s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
#endif
#if defined(HAVE_ATITC)
void atiCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
#endif
} // nv namespace
#endif // NV_TT_COMPRESSDXT_H

140
src/nvtt/CompressRGB.cpp Normal file
View File

@ -0,0 +1,140 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvimage/Image.h>
#include <nvimage/PixelFormat.h>
#include <nvmath/Color.h>
#include "CompressRGB.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
using namespace nv;
using namespace nvtt;
namespace
{
inline uint computePitch(uint w, uint bitsize)
{
uint p = w * ((bitsize + 7) / 8);
// Align to 32 bits.
return ((p + 3) / 4) * 4;
}
inline void convert_to_a8r8g8b8(const void * src, void * dst, uint w)
{
memcpy(dst, src, 4 * w);
}
inline void convert_to_x8r8g8b8(const void * src, void * dst, uint w)
{
memcpy(dst, src, 4 * w);
}
} // namespace
// Pixel format converter.
void nv::compressRGB(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
nvCheck(image != NULL);
const uint w = image->width();
const uint h = image->height();
const uint bitCount = compressionOptions.bitcount;
nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32);
const uint byteCount = bitCount / 8;
const uint rmask = compressionOptions.rmask;
uint rshift, rsize;
PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
const uint gmask = compressionOptions.gmask;
uint gshift, gsize;
PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
const uint bmask = compressionOptions.bmask;
uint bshift, bsize;
PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
const uint amask = compressionOptions.amask;
uint ashift, asize;
PixelFormat::maskShiftAndSize(amask, &ashift, &asize);
// Determine pitch.
uint pitch = computePitch(w, compressionOptions.bitcount);
uint8 * dst = (uint8 *)mem::malloc(pitch + 4);
for (uint y = 0; y < h; y++)
{
const Color32 * src = image->scanline(y);
if (bitCount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0xFF000000)
{
convert_to_a8r8g8b8(src, dst, w);
}
else if (bitCount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0)
{
convert_to_x8r8g8b8(src, dst, w);
}
else
{
// Generic pixel format conversion.
for (uint x = 0; x < w; x++)
{
uint c = 0;
c |= PixelFormat::convert(src[x].r, 8, rsize) << rshift;
c |= PixelFormat::convert(src[x].g, 8, gsize) << gshift;
c |= PixelFormat::convert(src[x].b, 8, bsize) << bshift;
c |= PixelFormat::convert(src[x].a, 8, asize) << ashift;
// Output one byte at a time.
for (uint i = 0; i < byteCount; i++)
{
*(dst + x * byteCount + i) = (c >> (i * 8)) & 0xFF;
}
}
// Zero padding.
for (uint x = w * byteCount; x < pitch; x++)
{
*(dst + x) = 0;
}
}
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(dst, pitch);
}
}
mem::free(dst);
}

View File

@ -21,20 +21,19 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSORRGBE_H
#define NV_TT_COMPRESSORRGBE_H
#ifndef NV_TT_COMPRESSRGB_H
#define NV_TT_COMPRESSRGB_H
#include "nvtt.h"
#include "Compressor.h"
namespace nv
{
struct CompressorRGBE : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
class Image;
// Pixel format converter.
void compressRGB(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
} // nv namespace
#endif // NV_TT_COMPRESSORRGBE_H
#endif // NV_TT_COMPRESSDXT_H

View File

@ -55,12 +55,6 @@ void CompressionOptions::reset()
m.rmask = 0x00FF0000;
m.amask = 0xFF000000;
m.rsize = 8;
m.gsize = 8;
m.bsize = 8;
m.asize = 8;
m.pixelType = PixelType_UnsignedNorm;
m.enableColorDithering = false;
m.enableAlphaDithering = false;
m.binaryAlpha = false;
@ -123,36 +117,8 @@ void CompressionOptions::setPixelFormat(uint bitcount, uint rmask, uint gmask, u
m.gmask = gmask;
m.bmask = bmask;
m.amask = amask;
m.rsize = 0;
m.gsize = 0;
m.bsize = 0;
m.asize = 0;
}
void CompressionOptions::setPixelFormat(uint8 rsize, uint8 gsize, uint8 bsize, uint8 asize)
{
nvCheck(rsize <= 32 || gsize <= 32 || bsize <= 32 || asize <= 32);
m.bitcount = 0;
m.rmask = 0;
m.gmask = 0;
m.bmask = 0;
m.amask = 0;
m.rsize = rsize;
m.gsize = gsize;
m.bsize = bsize;
m.asize = asize;
}
/// Set pixel type.
void CompressionOptions::setPixelType(PixelType pixelType)
{
m.pixelType = pixelType;
}
/// Use external compressor.
void CompressionOptions::setExternalCompressor(const char * name)
{

View File

@ -45,12 +45,6 @@ namespace nvtt
uint gmask;
uint bmask;
uint amask;
uint8 rsize;
uint8 gsize;
uint8 bsize;
uint8 asize;
PixelType pixelType;
nv::String externalCompressor;
@ -59,15 +53,6 @@ namespace nvtt
bool enableAlphaDithering;
bool binaryAlpha;
int alphaThreshold; // reference value used for binary alpha quantization.
uint getBitCount() const
{
if (format == Format_RGBA) {
if (bitcount != 0) return bitcount;
else return rsize + gsize + bsize + asize;
}
return 0;
}
};
} // nvtt namespace

853
src/nvtt/Compressor.cpp Normal file
View File

@ -0,0 +1,853 @@
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <nvcore/Memory.h>
#include <nvcore/Ptr.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include <nvimage/Filter.h>
#include <nvimage/Quantize.h>
#include <nvimage/NormalMap.h>
#include <nvimage/PixelFormat.h>
#include "Compressor.h"
#include "InputOptions.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
#include "CompressDXT.h"
#include "CompressRGB.h"
#include "cuda/CudaUtils.h"
#include "cuda/CudaCompressDXT.h"
using namespace nv;
using namespace nvtt;
namespace
{
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a) {
return 8;
}
else if (format == Format_DXT3) {
return 16;
}
else if (format == Format_DXT5 || format == Format_DXT5n) {
return 16;
}
else if (format == Format_BC4) {
return 8;
}
else if (format == Format_BC5) {
return 16;
}
return 0;
}
inline uint computePitch(uint w, uint bitsize)
{
uint p = w * ((bitsize + 7) / 8);
// Align to 32 bits.
return ((p + 3) / 4) * 4;
}
static int computeImageSize(uint w, uint h, uint d, uint bitCount, Format format)
{
if (format == Format_RGBA) {
return d * h * computePitch(w, bitCount);
}
else {
// @@ Handle 3D textures. DXT and VTC have different behaviors.
return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format);
}
}
} // namespace
namespace nvtt
{
// Mipmap could be:
// - a pointer to an input image.
// - a fixed point image.
// - a floating point image.
struct Mipmap
{
Mipmap() : m_inputImage(NULL) {}
~Mipmap() {}
// Reference input image.
void setFromInput(const InputOptions::Private & inputOptions, uint idx)
{
m_inputImage = inputOptions.image(idx);
m_fixedImage = NULL;
m_floatImage = NULL;
}
// Assign and take ownership of given image.
void setImage(FloatImage * image)
{
m_inputImage = NULL;
m_fixedImage = NULL;
m_floatImage = image;
}
// Convert linear float image to fixed image ready for compression.
void toFixedImage(const InputOptions::Private & inputOptions)
{
if (m_floatImage != NULL) // apfaffe - We should check that we have a float image, if so convert it!
{
if (inputOptions.isNormalMap || inputOptions.outputGamma == 1.0f)
{
m_fixedImage = m_floatImage->createImage();
}
else
{
m_fixedImage = m_floatImage->createImageGammaCorrect(inputOptions.outputGamma);
}
}
}
// Convert input image to linear float image.
void toFloatImage(const InputOptions::Private & inputOptions)
{
if (m_floatImage == NULL)
{
nvDebugCheck(this->asFixedImage() != NULL);
m_floatImage = new FloatImage(this->asFixedImage());
if (inputOptions.isNormalMap)
{
// Expand normals to [-1, 1] range.
// floatImage->expandNormals(0);
}
else if (inputOptions.inputGamma != 1.0f)
{
// Convert to linear space.
m_floatImage->toLinear(0, 3, inputOptions.inputGamma);
}
}
}
const FloatImage * asFloatImage() const
{
return m_floatImage.ptr();
}
FloatImage * asFloatImage()
{
return m_floatImage.ptr();
}
const Image * asFixedImage() const
{
// - apfaffe - switched logic to return the 'processed image' rather than the input!
if (m_fixedImage != NULL && m_fixedImage.ptr() != NULL)
{
return m_fixedImage.ptr();
}
return m_inputImage;
}
Image * asMutableFixedImage()
{
if (m_inputImage != NULL)
{
// Do not modify input image, create a copy.
m_fixedImage = new Image(*m_inputImage);
m_inputImage = NULL;
}
return m_fixedImage.ptr();
}
private:
const Image * m_inputImage;
AutoPtr<Image> m_fixedImage;
AutoPtr<FloatImage> m_floatImage;
};
} // nvtt namespace
Compressor::Compressor() : m(*new Compressor::Private())
{
// CUDA initialization.
m.cudaSupported = cuda::isHardwarePresent();
m.cudaEnabled = false;
m.cudaDevice = -1;
enableCudaAcceleration(m.cudaSupported);
}
Compressor::~Compressor()
{
enableCudaAcceleration(false);
delete &m;
}
/// Enable CUDA acceleration.
void Compressor::enableCudaAcceleration(bool enable)
{
if (m.cudaSupported)
{
if (m.cudaEnabled && !enable)
{
m.cudaEnabled = false;
m.cuda = NULL;
if (m.cudaDevice != -1)
{
// Exit device.
cuda::exitDevice();
}
}
else if (!m.cudaEnabled && enable)
{
// Init the CUDA device. This may return -1 if CUDA was already initialized by the app.
m.cudaEnabled = cuda::initDevice(&m.cudaDevice);
if (m.cudaEnabled)
{
// Create compressor if initialization succeeds.
m.cuda = new CudaCompressor();
// But cleanup if failed.
if (!m.cuda->isValid())
{
enableCudaAcceleration(false);
}
}
}
}
}
/// Check if CUDA acceleration is enabled.
bool Compressor::isCudaAccelerationEnabled() const
{
return m.cudaEnabled;
}
/// Compress the input texture with the given compression options.
bool Compressor::process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
{
return m.compress(inputOptions.m, compressionOptions.m, outputOptions.m);
}
/// Estimate the size of compressing the input with the given options.
int Compressor::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const
{
return m.estimateSize(inputOptions.m, compressionOptions.m);
}
bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
// Make sure enums match.
nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp);
nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror);
nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat);
// Get output handler.
if (!outputOptions.openFile())
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen);
return false;
}
inputOptions.computeTargetExtents();
// Output DDS header.
if (!outputHeader(inputOptions, compressionOptions, outputOptions))
{
return false;
}
for (uint f = 0; f < inputOptions.faceCount; f++)
{
if (!compressMipmaps(f, inputOptions, compressionOptions, outputOptions))
{
return false;
}
}
outputOptions.closeFile();
return true;
}
// Output DDS header.
bool Compressor::Private::outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
// Output DDS header.
if (outputOptions.outputHandler == NULL || !outputOptions.outputHeader)
{
return true;
}
DDSHeader header;
header.setWidth(inputOptions.targetWidth);
header.setHeight(inputOptions.targetHeight);
int mipmapCount = inputOptions.realMipmapCount();
nvDebugCheck(mipmapCount > 0);
header.setMipmapCount(mipmapCount);
if (inputOptions.textureType == TextureType_2D) {
header.setTexture2D();
}
else if (inputOptions.textureType == TextureType_Cube) {
header.setTextureCube();
}
/*else if (inputOptions.textureType == TextureType_3D) {
header.setTexture3D();
header.setDepth(inputOptions.targetDepth);
}*/
if (compressionOptions.format == Format_RGBA)
{
header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount));
header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask);
}
else
{
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_DXT3) {
header.setFourCC('D', 'X', 'T', '3');
}
else if (compressionOptions.format == Format_DXT5) {
header.setFourCC('D', 'X', 'T', '5');
}
else if (compressionOptions.format == Format_DXT5n) {
header.setFourCC('D', 'X', 'T', '5');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_BC4) {
header.setFourCC('A', 'T', 'I', '1');
}
else if (compressionOptions.format == Format_BC5) {
header.setFourCC('A', 'T', 'I', '2');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
header.swapBytes();
uint headerSize = 128;
if (header.hasDX10Header())
{
nvStaticCheck(sizeof(DDSHeader) == 128 + 20);
headerSize = 128 + 20;
}
bool writeSucceed = outputOptions.outputHandler->writeData(&header, headerSize);
if (!writeSucceed && outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_FileWrite);
}
return writeSucceed;
}
bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
uint w = inputOptions.targetWidth;
uint h = inputOptions.targetHeight;
uint d = inputOptions.targetDepth;
Mipmap mipmap;
const uint mipmapCount = inputOptions.realMipmapCount();
nvDebugCheck(mipmapCount > 0);
for (uint m = 0; m < mipmapCount; m++)
{
if (outputOptions.outputHandler)
{
int size = computeImageSize(w, h, d, compressionOptions.bitcount, compressionOptions.format);
outputOptions.outputHandler->beginImage(size, w, h, d, f, m);
}
// @@ Where to do the color transform?
// - Color transform may not be linear, so we cannot do before computing mipmaps.
// - Should be done in linear space, that is, after gamma correction.
if (!initMipmap(mipmap, inputOptions, w, h, d, f, m))
{
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_InvalidInput);
return false;
}
}
quantizeMipmap(mipmap, compressionOptions);
compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
// Compute extents of next mipmap:
w = max(1U, w / 2);
h = max(1U, h / 2);
d = max(1U, d / 2);
}
return true;
}
bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const
{
// Find image from input.
int inputIdx = findExactMipmap(inputOptions, w, h, d, f);
if ((inputIdx == -1 || inputOptions.convertToNormalMap) && m != 0)
{
// Generate from last, when mipmap not found, or normal map conversion enabled.
downsampleMipmap(mipmap, inputOptions);
}
else
{
if (inputIdx != -1)
{
// If input mipmap found, then get from input.
mipmap.setFromInput(inputOptions, inputIdx);
}
else
{
// If not found, resize closest mipmap.
inputIdx = findClosestMipmap(inputOptions, w, h, d, f);
if (inputIdx == -1)
{
return false;
}
mipmap.setFromInput(inputOptions, inputIdx);
scaleMipmap(mipmap, inputOptions, w, h, d);
}
processInputImage(mipmap, inputOptions);
}
// Convert linear float image to fixed image ready for compression.
mipmap.toFixedImage(inputOptions);
return true;
}
int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const
{
for (int m = 0; m < int(inputOptions.mipmapCount); m++)
{
int idx = f * inputOptions.mipmapCount + m;
const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d))
{
if (inputImage.data != NULL)
{
return idx;
}
return -1;
}
else if (inputImage.width < int(w) || inputImage.height < int(h) || inputImage.depth < int(d))
{
return -1;
}
}
return -1;
}
int Compressor::Private::findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const
{
int bestIdx = -1;
for (int m = 0; m < int(inputOptions.mipmapCount); m++)
{
int idx = f * inputOptions.mipmapCount + m;
const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
if (inputImage.data != NULL)
{
int difference = (inputImage.width - w) + (inputImage.height - h) + (inputImage.depth - d);
if (difference < 0)
{
if (bestIdx == -1)
{
bestIdx = idx;
}
return bestIdx;
}
bestIdx = idx;
}
}
return bestIdx;
}
// Create mipmap from the given image.
void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{
// Make sure that floating point linear representation is available.
mipmap.toFloatImage(inputOptions);
const FloatImage * floatImage = mipmap.asFloatImage();
if (inputOptions.mipmapFilter == MipmapFilter_Box)
{
// Use fast downsample.
mipmap.setImage(floatImage->fastDownSample());
}
else if (inputOptions.mipmapFilter == MipmapFilter_Triangle)
{
TriangleFilter filter;
mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
}
else /*if (inputOptions.mipmapFilter == MipmapFilter_Kaiser)*/
{
nvDebugCheck(inputOptions.mipmapFilter == MipmapFilter_Kaiser);
KaiserFilter filter(inputOptions.kaiserWidth);
filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
}
// Normalize mipmap.
if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps)
{
normalizeNormalMap(mipmap.asFloatImage());
}
}
void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const
{
mipmap.toFloatImage(inputOptions);
// @@ Add more filters.
// @@ Select different filters for downscaling and reconstruction.
// Resize image.
BoxFilter boxFilter;
mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode));
}
// Process an input image: Convert to normal map, normalize, or convert to linear space.
void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{
if (inputOptions.convertToNormalMap)
{
mipmap.toFixedImage(inputOptions);
Vector4 heightScale = inputOptions.heightFactors;
mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale));
}
else if (inputOptions.isNormalMap)
{
if (inputOptions.normalizeMipmaps)
{
// If floating point image available, normalize in place.
if (mipmap.asFloatImage() == NULL)
{
FloatImage * floatImage = new FloatImage(mipmap.asFixedImage());
normalizeNormalMap(floatImage);
mipmap.setImage(floatImage);
}
else
{
normalizeNormalMap(mipmap.asFloatImage());
mipmap.setImage(mipmap.asFloatImage());
}
}
}
else
{
if (inputOptions.inputGamma != inputOptions.outputGamma)
{
mipmap.toFloatImage(inputOptions);
}
}
}
// Quantize the given mipmap according to the compression options.
void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const
{
nvDebugCheck(mipmap.asFixedImage() != NULL);
if (compressionOptions.binaryAlpha)
{
if (compressionOptions.enableAlphaDithering)
{
Quantize::FloydSteinberg_BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
}
else
{
Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold);
}
}
if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering)
{
uint rsize = 8;
uint gsize = 8;
uint bsize = 8;
uint asize = 8;
if (compressionOptions.enableColorDithering)
{
if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5)
{
rsize = 5;
gsize = 6;
bsize = 5;
}
else if (compressionOptions.format == Format_RGB)
{
uint rshift, gshift, bshift;
PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize);
PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize);
PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize);
}
}
if (compressionOptions.enableAlphaDithering)
{
if (compressionOptions.format == Format_DXT3)
{
asize = 4;
}
else if (compressionOptions.format == Format_RGB)
{
uint ashift;
PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize);
}
}
if (compressionOptions.binaryAlpha)
{
asize = 8; // Already quantized.
}
Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize);
}
}
// Compress the given mipmap.
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
const Image * image = mipmap.asFixedImage();
nvDebugCheck(image != NULL);
FastCompressor fast;
fast.setImage(image, inputOptions.alphaMode);
SlowCompressor slow;
slow.setImage(image, inputOptions.alphaMode);
const bool useCuda = cudaEnabled && image->width() * image->height() >= 512;
if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
{
compressRGB(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_DXT1)
{
#if defined(HAVE_S3QUANT)
if (compressionOptions.externalCompressor == "s3")
{
s3CompressDXT1(image, outputOptions);
}
else
#endif
#if defined(HAVE_ATITC)
if (compressionOptions.externalCompressor == "ati")
{
atiCompressDXT1(image, outputOptions);
}
else
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
}
else
{
slow.compressDXT1(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT1a)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1a(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
/*cuda*/slow.compressDXT1a(compressionOptions, outputOptions);
}
else
{
slow.compressDXT1a(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT3)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT3(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT3(compressionOptions, outputOptions);
}
else
{
slow.compressDXT3(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT5)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT5(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT5(compressionOptions, outputOptions);
}
else
{
slow.compressDXT5(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT5n)
{
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT5n(outputOptions);
}
else
{
slow.compressDXT5n(compressionOptions, outputOptions);
}
}
else if (compressionOptions.format == Format_BC4)
{
slow.compressBC4(compressionOptions, outputOptions);
}
else if (compressionOptions.format == Format_BC5)
{
slow.compressBC5(compressionOptions, outputOptions);
}
return true;
}
int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const
{
const Format format = compressionOptions.format;
const uint bitCount = compressionOptions.bitcount;
inputOptions.computeTargetExtents();
uint mipmapCount = inputOptions.realMipmapCount();
int size = 0;
for (uint f = 0; f < inputOptions.faceCount; f++)
{
uint w = inputOptions.targetWidth;
uint h = inputOptions.targetHeight;
uint d = inputOptions.targetDepth;
for (uint m = 0; m < mipmapCount; m++)
{
size += computeImageSize(w, h, d, bitCount, format);
// Compute extents of next mipmap:
w = max(1U, w / 2);
h = max(1U, h / 2);
d = max(1U, d / 2);
}
}
return size;
}

View File

@ -1,4 +1,4 @@
// Copyright Ignacio Castano <icastano@nvidia.com> 2009
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -24,17 +24,57 @@
#ifndef NV_TT_COMPRESSOR_H
#define NV_TT_COMPRESSOR_H
#include <nvcore/nvcore.h> // uint
#include <nvcore/Ptr.h>
#include <nvtt/cuda/CudaCompressDXT.h>
#include "nvtt.h"
namespace nv
{
struct CompressorInterface
class Image;
}
namespace nvtt
{
struct Mipmap;
struct Compressor::Private
{
virtual ~CompressorInterface() {}
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) = 0;
Private() {}
bool compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
int estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const;
private:
bool outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
bool compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
bool initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const;
int findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
int findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
public:
bool cudaSupported;
bool cudaEnabled;
int cudaDevice;
nv::AutoPtr<nv::CudaCompressor> cuda;
};
} // nv namespace
} // nvtt namespace
#endif // NV_TT_COMPRESSOR_H
#endif // NV_TT_COMPRESSOR_H

View File

@ -1,676 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "CompressorDXT.h"
#include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
// squish
#include "squish/colourset.h"
#include "squish/fastclusterfit.h"
#include "squish/weightedclusterfit.h"
#include "nvtt.h"
#include "nvcore/Memory.h"
#include "nvimage/Image.h"
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
// s3_quant
#if defined(HAVE_S3QUANT)
#include "s3tc/s3_quant.h"
#endif
// ati tc
#if defined(HAVE_ATITC)
typedef int BOOL;
typedef _W64 unsigned long ULONG_PTR;
typedef ULONG_PTR DWORD_PTR;
#include "atitc/ATI_Compress.h"
#endif
// squish
#if defined(HAVE_SQUISH)
//#include "squish/squish.h"
#include "squish-1.10/squish.h"
#endif
// d3dx
#if defined(HAVE_D3DX)
#include <d3dx9.h>
#endif
// stb
#if defined(HAVE_STB)
#define STB_DEFINE
#include "stb/stb_dxt.h"
#endif
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
using namespace nv;
using namespace nvtt;
void FixedBlockCompressor::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
const uint size = bs * bw * bh;
#if defined(HAVE_OPENMP)
bool singleThreaded = false;
#else
bool singleThreaded = true;
#endif
// Use a single thread to compress small textures.
if (bw * bh < 16) singleThreaded = true;
if (singleThreaded)
{
nvDebugCheck(bs <= 16);
uint8 mem[16];
for (int y = 0; y < int(h); y += 4) {
for (uint x = 0; x < w; x += 4) {
ColorBlock rgba;
if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
rgba.init(w, h, (uint *)data, x, y);
}
else {
nvDebugCheck(inputFormat == nvtt::InputFormat_RGBA_32F);
rgba.init(w, h, (float *)data, x, y);
}
compressBlock(rgba, alphaMode, compressionOptions, mem);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs);
}
}
}
}
#if defined(HAVE_OPENMP)
else
{
uint8 * mem = new uint8[size];
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < int(bw*bh); i++)
{
const uint x = i % bw;
const uint y = i / bw;
ColorBlock rgba;
if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
rgba.init(w, h, (uint *)data, 4*x, 4*y);
}
else {
nvDebugCheck(inputFormat == nvtt::InputFormat_RGBA_32F);
rgba.init(w, h, (float *)data, 4*x, 4*y);
}
uint8 * ptr = mem + (y * bw + x) * bs;
compressBlock(rgba, alphaMode, compressionOptions, ptr);
} // omp for
} // omp parallel
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, size);
}
delete [] mem;
}
#endif
}
void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT1 * block = new(output) BlockDXT1;
QuickCompress::compressDXT1(rgba, block);
}
void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT1 * block = new(output) BlockDXT1;
QuickCompress::compressDXT1a(rgba, block);
}
void FastCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT3 * block = new(output) BlockDXT3;
QuickCompress::compressDXT3(rgba, block);
}
void FastCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT5 * block = new(output) BlockDXT5;
QuickCompress::compressDXT5(rgba, block);
}
void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R
BlockDXT5 * block = new(output) BlockDXT5;
QuickCompress::compressDXT5(rgba, block);
}
void FastCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI1 * block = new(output) BlockATI1;
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
QuickCompress::compressDXT5A(rgba, &block->alpha);
}
void FastCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI2 * block = new(output) BlockATI2;
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
QuickCompress::compressDXT5A(rgba, &block->x);
rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
QuickCompress::compressDXT5A(rgba, &block->y);
}
void NormalCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
if (rgba.isSingleColor())
{
BlockDXT1 * block = new(output) BlockDXT1;
OptimalCompress::compressDXT1(rgba.color(0), block);
}
else
{
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, nvsquish::kDxt1);
fit.Compress(output);
}
}
void NormalCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
bool anyAlpha = false;
bool allAlpha = true;
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a < 128) anyAlpha = true;
else allAlpha = false;
}
const bool isSingleColor = rgba.isSingleColor();
if ((!anyAlpha && isSingleColor || allAlpha))
{
BlockDXT1 * block = new(output) BlockDXT1;
OptimalCompress::compressDXT1a(rgba.color(0), block);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
int flags = nvsquish::kDxt1;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, nvsquish::kDxt1);
fit.Compress(output);
}
}
void NormalCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT3 * block = new(output) BlockDXT3;
// Compress explicit alpha.
OptimalCompress::compressDXT3A(rgba, &block->alpha);
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
}
void NormalCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT5 * block = new(output) BlockDXT5;
// Compress alpha.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block->alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block->alpha);
}
// Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
}
void NormalCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R
BlockDXT5 * block = new(output) BlockDXT5;
// Compress X.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT5A(rgba, &block->alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block->alpha);
}
// Compress Y.
if (compressionOptions.quality == Quality_Highest)
{
OptimalCompress::compressDXT1G(rgba, &block->color);
}
else
{
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1G(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(0, 1, 0);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
}
}
void ProductionCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI1 * block = new(output) BlockATI1;
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
OptimalCompress::compressDXT5A(rgba, &block->alpha);
}
void ProductionCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockATI2 * block = new(output) BlockATI2;
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
OptimalCompress::compressDXT5A(rgba, &block->x);
rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
OptimalCompress::compressDXT5A(rgba, &block->y);
}
#if defined(HAVE_S3QUANT)
void S3CompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
float error = 0.0f;
BlockDXT1 dxtBlock3;
BlockDXT1 dxtBlock4;
ColorBlock block;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
block.init(inputFormat, w, h, data, x, y);
// Init rgb block.
RGBBlock rgbBlock;
rgbBlock.n = 16;
for (uint i = 0; i < 16; i++) {
rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f);
rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f);
}
rgbBlock.weight[0] = 1.0f;
rgbBlock.weight[1] = 1.0f;
rgbBlock.weight[2] = 1.0f;
rgbBlock.inLevel = 4;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock4.col0.r = rgbBlock.endPoint[0][0];
dxtBlock4.col0.g = rgbBlock.endPoint[0][1];
dxtBlock4.col0.b = rgbBlock.endPoint[0][2];
dxtBlock4.col1.r = rgbBlock.endPoint[1][0];
dxtBlock4.col1.g = rgbBlock.endPoint[1][1];
dxtBlock4.col1.b = rgbBlock.endPoint[1][2];
dxtBlock4.setIndices(rgbBlock.index);
if (dxtBlock4.col0.u < dxtBlock4.col1.u) {
swap(dxtBlock4.col0.u, dxtBlock4.col1.u);
dxtBlock4.indices ^= 0x55555555;
}
uint error4 = blockError(block, dxtBlock4);
rgbBlock.inLevel = 3;
CodeRGBBlock(&rgbBlock);
// Copy results to DXT block.
dxtBlock3.col0.r = rgbBlock.endPoint[0][0];
dxtBlock3.col0.g = rgbBlock.endPoint[0][1];
dxtBlock3.col0.b = rgbBlock.endPoint[0][2];
dxtBlock3.col1.r = rgbBlock.endPoint[1][0];
dxtBlock3.col1.g = rgbBlock.endPoint[1][1];
dxtBlock3.col1.b = rgbBlock.endPoint[1][2];
dxtBlock3.setIndices(rgbBlock.index);
if (dxtBlock3.col0.u > dxtBlock3.col1.u) {
swap(dxtBlock3.col0.u, dxtBlock3.col1.u);
dxtBlock3.indices ^= (~dxtBlock3.indices >> 1) & 0x55555555;
}
uint error3 = blockError(block, dxtBlock3);
if (error3 < error4) {
error += error3;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3));
}
}
else {
error += error4;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4));
}
}
}
}
}
#endif // defined(HAVE_S3QUANT)
#if defined(HAVE_ATITC)
void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
ATI_TC_CompressOptions options;
options.dwSize = sizeof(options);
options.bUseChannelWeighting = false;
options.bUseAdaptiveWeighting = false;
options.bDXT1UseAlpha = false;
options.nCompressionSpeed = ATI_TC_Speed_Normal;
options.bDisableMultiThreading = false;
//options.bDisableMultiThreading = true;
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT5;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
#endif // defined(HAVE_ATITC)
#if defined(HAVE_SQUISH)
void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
/*
Image img(*image);
int count = img.width() * img.height();
for (int i = 0; i < count; i++)
{
Color32 c = img.pixel(i);
img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
void * blocks = mem::malloc(size);
squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(blocks, size);
}
mem::free(blocks);
*/
}
#endif // defined(HAVE_SQUISH)
#if defined(HAVE_D3DX)
void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS presentParams;
ZeroMemory(&presentParams, sizeof(presentParams));
presentParams.Windowed = TRUE;
presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
presentParams.BackBufferWidth = 8;
presentParams.BackBufferHeight = 8;
presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
HRESULT err;
IDirect3DDevice9 * device = NULL;
err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
IDirect3DTexture9 * texture = NULL;
err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
IDirect3DSurface9 * surface = NULL;
err = texture->GetSurfaceLevel(0, &surface);
RECT rect;
rect.left = 0;
rect.top = 0;
rect.bottom = h;
rect.right = w;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
}
else
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
}
if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
{
D3DLOCKED_RECT rect;
ZeroMemory(&rect, sizeof(rect));
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
err = surface->UnlockRect();
}
surface->Release();
device->Release();
d3d->Release();
}
#endif // defined(HAVE_D3DX)
#if defined(HAVE_STB)
void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(2, 1, 0, 3); // Swap R and B
stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
}
#endif // defined(HAVE_STB)

View File

@ -1,179 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSORDXT_H
#define NV_TT_COMPRESSORDXT_H
#include <nvcore/nvcore.h>
#include "nvtt.h"
#include "Compressor.h"
namespace nv
{
struct ColorBlock;
struct FixedBlockCompressor : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
virtual uint blockSize() const = 0;
};
// Fast CPU compressors.
struct FastCompressorDXT1 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct FastCompressorDXT1a : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct FastCompressorDXT3 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
struct FastCompressorDXT5 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
struct FastCompressorDXT5n : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
struct FastCompressorBC4 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct FastCompressorBC5 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
// Normal CPU compressors.
struct NormalCompressorDXT1 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct NormalCompressorDXT1a : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct NormalCompressorDXT3 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
struct NormalCompressorDXT5 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
struct NormalCompressorDXT5n : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
// Production CPU compressors.
struct ProductionCompressorBC4 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct ProductionCompressorBC5 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
// External compressors.
#if defined(HAVE_S3QUANT)
struct S3CompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_ATITC)
struct AtiCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct AtiCompressorDXT5 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_SQUISH)
struct SquishCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_D3DX)
struct D3DXCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_STB)
struct StbCompressorDXT1 : public FixedBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
} // nv namespace
#endif // NV_TT_COMPRESSORDXT_H

View File

@ -1,230 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "CompressorRGB.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include <nvimage/PixelFormat.h>
#include <nvmath/Color.h>
#include <nvmath/Half.h>
#include <nvcore/Debug.h>
using namespace nv;
using namespace nvtt;
namespace
{
inline uint computePitch(uint w, uint bitsize)
{
uint p = w * ((bitsize + 7) / 8);
// Align to 32 bits.
return ((p + 3) / 4) * 4;
}
inline void convert_to_a8r8g8b8(const void * src, void * dst, uint w)
{
memcpy(dst, src, 4 * w);
}
inline void convert_to_x8r8g8b8(const void * src, void * dst, uint w)
{
memcpy(dst, src, 4 * w);
}
static uint16 to_half(float f)
{
union { float f; uint32 u; } c;
c.f = f;
return half_from_float(c.u);
}
} // namespace
void PixelFormatConverter::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
uint bitCount;
uint rmask, rshift, rsize;
uint gmask, gshift, gsize;
uint bmask, bshift, bsize;
uint amask, ashift, asize;
if (compressionOptions.pixelType == nvtt::PixelType_Float)
{
rsize = compressionOptions.rsize;
gsize = compressionOptions.gsize;
bsize = compressionOptions.bsize;
asize = compressionOptions.asize;
nvCheck(rsize == 0 || rsize == 16 || rsize == 32);
nvCheck(gsize == 0 || gsize == 16 || gsize == 32);
nvCheck(bsize == 0 || bsize == 16 || bsize == 32);
nvCheck(asize == 0 || asize == 16 || asize == 32);
bitCount = rsize + gsize + bsize + asize;
}
else
{
if (compressionOptions.bitcount != 0)
{
bitCount = compressionOptions.bitcount;
nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32);
rmask = compressionOptions.rmask;
gmask = compressionOptions.gmask;
bmask = compressionOptions.bmask;
amask = compressionOptions.amask;
PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
PixelFormat::maskShiftAndSize(amask, &ashift, &asize);
}
else
{
rsize = compressionOptions.rsize;
gsize = compressionOptions.gsize;
bsize = compressionOptions.bsize;
asize = compressionOptions.asize;
bitCount = rsize + gsize + bsize + asize;
nvCheck(bitCount <= 32);
ashift = 0;
bshift = ashift + asize;
gshift = bshift + bsize;
rshift = gshift + gsize;
rmask = ((1 << rsize) - 1) << rshift;
gmask = ((1 << gsize) - 1) << gshift;
bmask = ((1 << bsize) - 1) << bshift;
amask = ((1 << asize) - 1) << ashift;
}
}
uint byteCount = (bitCount + 7) / 8;
uint pitch = computePitch(w, bitCount);
uint srcPitch = w;
uint srcPlane = w * h;
// Allocate output scanline.
uint8 * dst = (uint8 *)mem::malloc(pitch + 4);
for (uint y = 0; y < h; y++)
{
const uint * src = (const uint *)data + y * srcPitch;
const float * fsrc = (const float *)data + y * srcPitch;
uint8 * ptr = dst;
for (uint x = 0; x < w; x++)
{
float r, g, b, a;
if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
Color32 c = Color32(src[x]);
r = float(c.r) / 255.0f;
g = float(c.g) / 255.0f;
b = float(c.b) / 255.0f;
a = float(c.a) / 255.0f;
}
else {
nvDebugCheck (inputFormat == nvtt::InputFormat_RGBA_32F);
//r = ((float *)src)[4 * x + 0]; // Color components not interleaved.
//g = ((float *)src)[4 * x + 1];
//b = ((float *)src)[4 * x + 2];
//a = ((float *)src)[4 * x + 3];
r = fsrc[x + 0 * srcPlane];
g = fsrc[x + 1 * srcPlane];
b = fsrc[x + 2 * srcPlane];
a = fsrc[x + 3 * srcPlane];
}
if (compressionOptions.pixelType == nvtt::PixelType_Float)
{
if (rsize == 32) *((float *)ptr) = r;
else if (rsize == 16) *((uint16 *)ptr) = to_half(r);
ptr += rsize / 8;
if (gsize == 32) *((float *)ptr) = g;
else if (gsize == 16) *((uint16 *)ptr) = to_half(g);
ptr += gsize / 8;
if (bsize == 32) *((float *)ptr) = b;
else if (bsize == 16) *((uint16 *)ptr) = to_half(b);
ptr += bsize / 8;
if (asize == 32) *((float *)ptr) = a;
else if (asize == 16) *((uint16 *)ptr) = to_half(a);
ptr += asize / 8;
}
else
{
Color32 c;
if (compressionOptions.pixelType == nvtt::PixelType_UnsignedNorm) {
c.r = uint8(clamp(r * 255, 0.0f, 255.0f));
c.g = uint8(clamp(g * 255, 0.0f, 255.0f));
c.b = uint8(clamp(b * 255, 0.0f, 255.0f));
c.a = uint8(clamp(a * 255, 0.0f, 255.0f));
}
// @@ Add support for nvtt::PixelType_SignedInt, nvtt::PixelType_SignedNorm, nvtt::PixelType_UnsignedInt
uint p = 0;
p |= PixelFormat::convert(c.r, 8, rsize) << rshift;
p |= PixelFormat::convert(c.g, 8, gsize) << gshift;
p |= PixelFormat::convert(c.b, 8, bsize) << bshift;
p |= PixelFormat::convert(c.a, 8, asize) << ashift;
// Output one byte at a time.
for (uint i = 0; i < byteCount; i++)
{
*(dst + x * byteCount + i) = (p >> (i * 8)) & 0xFF;
}
}
}
// Zero padding.
for (uint x = w * byteCount; x < pitch; x++)
{
*(dst + x) = 0;
}
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(dst, pitch);
}
}
mem::free(dst);
}

View File

@ -1,102 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "CompressorRGBE.h"
#include "CompressionOptions.h"
#include "OutputOptions.h"
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include <nvmath/Color.h>
#include <nvcore/Debug.h>
using namespace nv;
using namespace nvtt;
static Color32 toRgbe8(float r, float g, float b)
{
Color32 c;
float v = max(max(r, g), b);
if (v < 1e-32) {
c.r = c.g = c.b = c.a = 0;
}
else {
int e;
v = frexp(v, &e) * 256.0f / v;
c.r = uint8(clamp(r * v, 0.0f, 255.0f));
c.g = uint8(clamp(g * v, 0.0f, 255.0f));
c.b = uint8(clamp(b * v, 0.0f, 255.0f));
c.a = e + 128;
}
return c;
}
void CompressorRGBE::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck (compressionOptions.format == nvtt::Format_RGBE);
uint srcPitch = w;
uint srcPlane = w * h;
// Allocate output scanline.
Color32 * dst = (Color32 *)mem::malloc(w);
for (uint y = 0; y < h; y++)
{
const uint * src = (const uint *)data + y * srcPitch;
const float * fsrc = (const float *)data + y * srcPitch;
for (uint x = 0; x < w; x++)
{
float r, g, b;
if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
Color32 c = Color32(src[x]);
r = float(c.r) / 255.0f;
g = float(c.g) / 255.0f;
b = float(c.b) / 255.0f;
}
else {
nvDebugCheck (inputFormat == nvtt::InputFormat_RGBA_32F);
// Color components not interleaved.
r = fsrc[x + 0 * srcPlane];
g = fsrc[x + 1 * srcPlane];
b = fsrc[x + 2 * srcPlane];
}
dst[x] = toRgbe8(r, g, b);
}
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(dst, w * 4);
}
}
mem::free(dst);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,87 +0,0 @@
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_CONTEXT_H
#define NV_TT_CONTEXT_H
#include "nvcore/Ptr.h"
#include "nvtt/Compressor.h"
#include "nvtt/cuda/CudaCompressorDXT.h"
#include "nvtt.h"
namespace nv
{
class Image;
}
namespace nvtt
{
struct Mipmap;
struct Compressor::Private
{
Private() {}
bool compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
bool compress2D(InputFormat inputFormat, AlphaMode alphaMode, int w, int h, const void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
int estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const;
bool outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions);
private:
bool outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const;
nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const;
bool compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
bool initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const;
int findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
int findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const;
void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
void premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
public:
bool cudaSupported;
bool cudaEnabled;
nv::AutoPtr<nv::CudaContext> cuda;
};
} // nvtt namespace
#endif // NV_TT_CONTEXT_H

View File

@ -23,11 +23,8 @@
#include <string.h> // memcpy
#include <nvcore/Containers.h> // nextPowerOfTwo
#include <nvcore/Memory.h>
#include <nvmath/Color.h>
#include "nvtt.h"
#include "InputOptions.h"
@ -104,8 +101,6 @@ void InputOptions::reset()
m.colorTransform = ColorTransform_None;
m.linearTransform = Matrix(identity);
for (int i = 0; i < 4; i++) m.colorOffsets[i] = 0;
for (int i = 0; i < 4; i++) m.swizzleTransform[i] = i;
m.generateMipmaps = true;
m.maxLevel = -1;
@ -123,8 +118,6 @@ void InputOptions::reset()
m.maxExtent = 0;
m.roundMode = RoundMode_None;
m.premultiplyAlpha = false;
}
@ -168,8 +161,7 @@ void InputOptions::setTextureLayout(TextureType type, int width, int height, int
img.mipLevel = mipLevel;
img.face = f;
img.uint8data = NULL;
img.floatdata = NULL;
img.data = NULL;
w = max(1U, w / 2);
h = max(1U, h / 2);
@ -207,116 +199,14 @@ bool InputOptions::setMipmapData(const void * data, int width, int height, int d
return false;
}
switch(m.inputFormat)
{
case InputFormat_BGRA_8UB:
if (Image * image = new nv::Image())
{
image->allocate(width, height);
memcpy(image->pixels(), data, width * height * 4);
m.images[idx].uint8data = image;
}
else
{
// @@ Out of memory error.
return false;
}
break;
case InputFormat_RGBA_32F:
if (FloatImage * image = new nv::FloatImage())
{
const float * floatData = (const float *)data;
image->allocate(4, width, height);
for (int c = 0; c < 4; c++)
{
float * channel = image->channel(c);
for (int i = 0; i < width * height; i++)
{
channel[i] = floatData[i*4 + c];
}
}
m.images[idx].floatdata = image;
}
else
{
// @@ Out of memory error.
return false;
}
break;
default:
return false;
}
m.images[idx].data = new nv::Image();
m.images[idx].data->allocate(width, height);
memcpy(m.images[idx].data->pixels(), data, width * height * 4);
return true;
}
// Copies data
bool InputOptions::setMipmapChannelData(const void * data, int channel, int width, int height, int depth /*= 1*/, int face /*= 0*/, int mipLevel /*= 0*/)
{
nvCheck(depth == 1);
nvCheck(channel >= 0 && channel < 4);
const int idx = face * m.mipmapCount + mipLevel;
if (m.images[idx].width != width || m.images[idx].height != height || m.images[idx].depth != depth || m.images[idx].mipLevel != mipLevel || m.images[idx].face != face)
{
// Invalid dimension or index.
return false;
}
// Allocate image if not allocated already.
if (m.inputFormat == InputFormat_BGRA_8UB)
{
m.images[idx].floatdata = NULL;
if (m.images[idx].uint8data == NULL)
{
m.images[idx].uint8data = new Image();
m.images[idx].uint8data->allocate(width, height);
m.images[idx].uint8data->fill(Color32(0,0,0,0));
}
}
else if (m.inputFormat == InputFormat_RGBA_32F)
{
m.images[idx].uint8data = NULL;
if (m.images[idx].floatdata == NULL)
{
m.images[idx].floatdata = new FloatImage();
m.images[idx].floatdata->allocate(4, width, height);
m.images[idx].floatdata->clear();
}
}
else
{
m.images[idx].floatdata = NULL;
m.images[idx].uint8data = NULL;
return false;
}
// Copy channel data to image.
if (m.inputFormat == InputFormat_BGRA_8UB)
{
// @@ TODO
}
else if (m.inputFormat == InputFormat_RGBA_32F)
{
const float * floatData = (const float *)data;
float * channelPtr = m.images[idx].floatdata->channel(channel);
for (int i = 0; i < width * height; i++)
{
channelPtr[i] = floatData[i];
}
}
return true;
}
/// Describe the format of the input.
void InputOptions::setFormat(InputFormat format)
{
@ -411,32 +301,8 @@ void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2,
{
nvCheck(channel >= 0 && channel < 4);
m.linearTransform(channel, 0) = w0;
m.linearTransform(channel, 1) = w1;
m.linearTransform(channel, 2) = w2;
m.linearTransform(channel, 3) = w3;
}
void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2, float w3, float offset)
{
nvCheck(channel >= 0 && channel < 4);
setLinearTransform(channel, w0, w1, w2, w3);
m.colorOffsets[channel] = offset;
}
void InputOptions::setSwizzleTransform(int x, int y, int z, int w)
{
nvCheck(x >= 0 && x <= 6);
nvCheck(y >= 0 && y <= 6);
nvCheck(z >= 0 && z <= 6);
nvCheck(w >= 0 && w <= 6);
m.swizzleTransform[0] = x;
m.swizzleTransform[1] = y;
m.swizzleTransform[2] = z;
m.swizzleTransform[3] = w;
Vector4 w(w0, w1, w2, w3);
//m.linearTransform.setRow(channel, w);
}
void InputOptions::setMaxExtents(int e)
@ -450,10 +316,6 @@ void InputOptions::setRoundMode(RoundMode mode)
m.roundMode = mode;
}
void InputOptions::setPremultiplyAlpha(bool b)
{
m.premultiplyAlpha = b;
}
void InputOptions::Private::computeTargetExtents() const
{
@ -533,7 +395,7 @@ const Image * InputOptions::Private::image(uint face, uint mipmap) const
nvDebugCheck(image.face == face);
nvDebugCheck(image.mipLevel == mipmap);
return image.uint8data.ptr();
return image.data.ptr();
}
const Image * InputOptions::Private::image(uint idx) const
@ -542,14 +404,5 @@ const Image * InputOptions::Private::image(uint idx) const
const InputImage & image = this->images[idx];
return image.uint8data.ptr();
}
const FloatImage * InputOptions::Private::floatImage(uint idx) const
{
nvDebugCheck(idx < faceCount * mipmapCount);
const InputImage & image = this->images[idx];
return image.floatdata.ptr();
return image.data.ptr();
}

View File

@ -28,7 +28,6 @@
#include <nvmath/Vector.h>
#include <nvmath/Matrix.h>
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
#include "nvtt.h"
namespace nvtt
@ -57,8 +56,6 @@ namespace nvtt
// Color transform.
ColorTransform colorTransform;
nv::Matrix linearTransform;
float colorOffsets[4];
uint swizzleTransform[4];
// Mipmap generation options.
bool generateMipmaps;
@ -81,8 +78,6 @@ namespace nvtt
uint maxExtent;
RoundMode roundMode;
bool premultiplyAlpha;
// @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere...
mutable uint targetWidth;
mutable uint targetHeight;
@ -94,9 +89,7 @@ namespace nvtt
int realMipmapCount() const;
const nv::Image * image(uint face, uint mipmap) const;
const nv::Image * image(uint idx) const;
const nv::FloatImage * floatImage(uint idx) const;
const nv::Image * image(uint idx) const;
};
@ -105,8 +98,6 @@ namespace nvtt
{
InputImage() {}
bool hasValidData() const { return uint8data != NULL || floatdata != NULL; }
int mipLevel;
int face;
@ -114,8 +105,7 @@ namespace nvtt
int height;
int depth;
nv::AutoPtr<nv::Image> uint8data;
nv::AutoPtr<nv::FloatImage> floatdata;
nv::AutoPtr<nv::Image> data;
};
} // nvtt namespace

View File

@ -21,17 +21,16 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvmath/Color.h>
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
#include <nvcore/Containers.h> // swap
#include <limits.h>
using namespace nv;
using namespace OptimalCompress;
@ -40,37 +39,10 @@ using namespace OptimalCompress;
namespace
{
static int greenDistance(int g0, int g1)
{
//return abs(g0 - g1);
int d = g0 - g1;
return d * d;
}
static int alphaDistance(int a0, int a1)
{
//return abs(a0 - a1);
int d = a0 - a1;
return d * d;
}
static uint nearestGreen4(uint green, uint maxGreen, uint minGreen)
{
uint bias = maxGreen + (maxGreen - minGreen) / 6;
uint index = 0;
if (maxGreen - minGreen != 0) index = clamp(3 * (bias - green) / (maxGreen - minGreen), 0U, 3U);
return (index * minGreen + (3 - index) * maxGreen) / 3;
}
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block, int bestError = INT_MAX)
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
// uint g0 = (block->col0.g << 2) | (block->col0.g >> 4);
// uint g1 = (block->col1.g << 2) | (block->col1.g >> 4);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
@ -78,24 +50,17 @@ namespace
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = greenDistance(green, palette[0]);
error = min(error, greenDistance(green, palette[1]));
error = min(error, greenDistance(green, palette[2]));
error = min(error, greenDistance(green, palette[3]));
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
// totalError += nearestGreen4(green, g0, g1);
if (totalError > bestError)
{
// early out
return totalError;
}
}
return totalError;
@ -113,10 +78,10 @@ namespace
{
const int color = rgba.color(i).g;
uint d0 = greenDistance(color0, color);
uint d1 = greenDistance(color1, color);
uint d2 = greenDistance(color2, color);
uint d3 = greenDistance(color3, color);
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
@ -137,78 +102,49 @@ namespace
// Choose quantized color that produces less error. Used by DXT3 compressor.
inline static uint quantize4(uint8 a)
{
int q0 = max(int(a >> 4) - 1, 0);
int q0 = (a >> 4) - 1;
int q1 = (a >> 4);
int q2 = min(int(a >> 4) + 1, 0xF);
int q2 = (a >> 4) + 1;
q0 = (q0 << 4) | q0;
q1 = (q1 << 4) | q1;
q2 = (q2 << 4) | q2;
int d0 = alphaDistance(q0, a);
int d1 = alphaDistance(q1, a);
int d2 = alphaDistance(q2, a);
int d0 = abs(q0 - a);
int d1 = abs(q1 - a);
int d2 = abs(q2 - a);
if (d0 < d1 && d0 < d2) return q0 >> 4;
if (d1 < d2) return q1 >> 4;
return q2 >> 4;
}
static uint nearestAlpha8(uint alpha, uint maxAlpha, uint minAlpha)
{
float bias = maxAlpha + float(maxAlpha - minAlpha) / (2.0f * 7.0f);
float scale = 7.0f / float(maxAlpha - minAlpha);
uint index = (uint)clamp((bias - float(alpha)) * scale, 0.0f, 7.0f);
return (index * minAlpha + (7 - index) * maxAlpha) / 7;
}
static uint computeAlphaError8(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
{
int totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
totalError += alphaDistance(alpha, nearestAlpha8(alpha, block->alpha0, block->alpha1));
if (totalError > bestError)
{
// early out
return totalError;
}
}
return totalError;
}
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
int totalError = 0;
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int minDist = INT_MAX;
uint besterror = 256*256;
uint best;
for (uint p = 0; p < 8; p++)
{
int dist = alphaDistance(alpha, alphas[p]);
minDist = min(dist, minDist);
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += minDist;
if (totalError > bestError)
{
// early out
return totalError;
}
totalError += besterror;
}
return totalError;
@ -223,21 +159,22 @@ namespace
{
uint8 alpha = rgba.color(i).a;
int minDist = INT_MAX;
int bestIndex = 8;
for (uint p = 0; p < 8; p++)
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int dist = alphaDistance(alpha, alphas[p]);
int d = alphas[p] - alpha;
uint error = d * d;
if (dist < minDist)
if (error < besterror)
{
minDist = dist;
bestIndex = p;
besterror = error;
best = p;
}
}
nvDebugCheck(bestIndex < 8);
nvDebugCheck(best < 8);
block->setIndex(i, bestIndex);
block->setIndex(i, best);
}
}
@ -280,23 +217,6 @@ void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
}
}
void OptimalCompress::compressDXT1G(uint8 g, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = 31;
dxtBlock->col0.g = OMatch6[g][0];
dxtBlock->col0.b = 0;
dxtBlock->col1.r = 31;
dxtBlock->col1.g = OMatch6[g][1];
dxtBlock->col1.b = 0;
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
// Brute force green channel compressor
void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
@ -306,23 +226,12 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
uint8 ming = 63;
uint8 maxg = 0;
bool isSingleColor = true;
uint8 singleColor = rgba.color(0).g;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = (rgba.color(i).g + 1) >> 2;
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
if (rgba.color(i).g != singleColor) isSingleColor = false;
}
if (isSingleColor)
{
compressDXT1G(singleColor, block);
return;
}
block->col0.r = 31;
@ -332,38 +241,36 @@ void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
block->col0.b = 0;
block->col1.b = 0;
int bestError = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
// Expand search space a bit.
const int greenExpand = 4;
ming = (ming <= greenExpand) ? 0 : ming - greenExpand;
maxg = (maxg >= 63-greenExpand) ? 63 : maxg + greenExpand;
for (int g0 = ming+1; g0 <= maxg; g0++)
if (maxg - ming > 4)
{
for (int g1 = ming; g1 < g0; g1++)
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block, bestError);
if (error < bestError)
for (int g1 = ming; g1 < g0-4; g1++)
{
bestError = error;
bestg0 = g0;
bestg1 = g1;
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
block->col0.g = bestg0;
block->col1.g = bestg1;
nvDebugCheck(bestg0 == bestg1 || block->isFourColorMode());
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
@ -406,26 +313,42 @@ void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dx
dxtBlock->alpha0 = maxa;
dxtBlock->alpha1 = mina;
/*int centroidDist = 256;
int centroid;
// Get the closest to the centroid.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int dist = abs(alpha - (maxa + mina) / 2);
if (dist < centroidDist)
{
centroidDist = dist;
centroid = alpha;
}
}*/
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, dxtBlock);
int besta0 = maxa;
int besta1 = mina;
// Expand search space a bit.
const int alphaExpand = 8;
mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand;
maxa = (maxa >= 255-alphaExpand) ? 255 : maxa + alphaExpand;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
//for (int a1 = mina; a1 < maxa; a1++)
{
nvDebugCheck(a0 - a1 > 8);
//nvCheck(abs(a1-a0) > 8);
//if (abs(a0 - a1) < 8) continue;
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
if ((maxa-a0) + (a1-mina) > besterror)
continue;
dxtBlock->alpha0 = a0;
dxtBlock->alpha1 = a1;
int error = computeAlphaError(rgba, dxtBlock, besterror);
int error = computeAlphaError(rgba, dxtBlock);
if (error < besterror)
{

View File

@ -26,8 +26,6 @@
#include <nvimage/nvimage.h>
#include <nvmath/Color.h>
namespace nv
{
struct ColorBlock;
@ -41,7 +39,6 @@ namespace nv
{
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(uint8 g, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);

View File

@ -33,9 +33,6 @@ OutputOptions::OutputOptions() : m(*new OutputOptions::Private())
OutputOptions::~OutputOptions()
{
// Cleanup output handler.
setOutputHandler(NULL);
delete &m;
}
@ -46,31 +43,20 @@ void OutputOptions::reset()
m.outputHandler = NULL;
m.errorHandler = NULL;
m.outputHeader = true;
m.container = Container_DDS;
}
/// Set output file name.
void OutputOptions::setFileName(const char * fileName)
{
m.fileName = fileName; // @@ Do we need to record filename?
m.fileName = fileName;
m.outputHandler = NULL;
DefaultOutputHandler * oh = new DefaultOutputHandler(fileName);
if (!oh->stream.isError())
{
m.outputHandler = oh;
}
}
/// Set output handler.
void OutputOptions::setOutputHandler(OutputHandler * outputHandler)
{
if (!m.fileName.isNull())
{
delete m.outputHandler;
m.fileName.reset();
}
m.fileName.reset();
m.outputHandler = outputHandler;
}
@ -86,20 +72,31 @@ void OutputOptions::setOutputHeader(bool outputHeader)
m.outputHeader = outputHeader;
}
/// Set container.
void OutputOptions::setContainer(Container container)
{
m.container = container;
}
bool OutputOptions::Private::hasValidOutputHandler() const
bool OutputOptions::Private::openFile() const
{
if (!fileName.isNull())
{
return outputHandler != NULL;
nvCheck(outputHandler == NULL);
DefaultOutputHandler * oh = new DefaultOutputHandler(fileName.str());
if (oh->stream.isError())
{
return false;
}
outputHandler = oh;
}
return true;
}
void OutputOptions::Private::closeFile() const
{
if (!fileName.isNull())
{
delete outputHandler;
outputHandler = NULL;
}
}

View File

@ -52,7 +52,7 @@ namespace nvtt
//return !stream.isError();
return true;
}
nv::StdOutputStream stream;
};
@ -61,12 +61,12 @@ namespace nvtt
{
nv::Path fileName;
OutputHandler * outputHandler;
mutable OutputHandler * outputHandler;
ErrorHandler * errorHandler;
bool outputHeader;
Container container;
bool hasValidOutputHandler() const;
bool openFile() const;
void closeFile() const;
};

View File

@ -21,10 +21,7 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvmath/Fitting.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
@ -133,7 +130,7 @@ inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1)
return dot(c0-c1, c0-c1);
}
inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
inline static uint computeIndices4(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 palette[4];
palette[0] = maxColor;
@ -165,28 +162,6 @@ inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColo
return indices;
}
inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 palette[4];
palette[0] = maxColor;
palette[1] = minColor;
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
float total = 0.0f;
for (int i = 0; i < 16; i++)
{
float d0 = colorDistance(palette[0], block[i]);
float d1 = colorDistance(palette[1], block[i]);
float d2 = colorDistance(palette[2], block[i]);
float d3 = colorDistance(palette[3], block[i]);
total += min(min(d0, d1), min(d2, d3));
}
return total;
}
inline static uint computeIndices3(const ColorBlock & rgba, Vector3::Arg maxColor, Vector3::Arg minColor)
{
Vector3 palette[4];
@ -475,8 +450,7 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
// read block
Vector3 block[16];
extractColorBlockRGB(rgba, block);
#if 1
// find min and max colors
Vector3 maxColor, minColor;
findMinMaxColorsBox(block, 16, &maxColor, &minColor);
@ -484,31 +458,7 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
selectDiagonal(block, 16, &maxColor, &minColor);
insetBBox(&maxColor, &minColor);
#else
float weights[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
Vector3 cluster[4];
int count = Compute4Means(16, block, weights, Vector3(1, 1, 1), cluster);
Vector3 maxColor, minColor;
float bestError = FLT_MAX;
for (int i = 1; i < 4; i++)
{
for (int j = 0; j < i; j++)
{
uint16 color0 = roundAndExpand(&cluster[i]);
uint16 color1 = roundAndExpand(&cluster[j]);
float error = evaluatePaletteError4(block, cluster[i], cluster[j]);
if (error < bestError) {
bestError = error;
maxColor = cluster[i];
minColor = cluster[j];
}
}
}
#endif
uint16 color0 = roundAndExpand(&maxColor);
uint16 color1 = roundAndExpand(&minColor);

File diff suppressed because it is too large Load Diff

View File

@ -1,79 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_TEXIMAGE_H
#define NV_TT_TEXIMAGE_H
#include "nvtt.h"
#include <nvcore/Containers.h>
#include <nvcore/RefCounted.h>
#include <nvcore/Ptr.h>
#include <nvimage/Image.h>
#include <nvimage/FloatImage.h>
namespace nvtt
{
struct TexImage::Private : public nv::RefCounted
{
Private()
{
type = TextureType_2D;
wrapMode = WrapMode_Mirror;
alphaMode = AlphaMode_None;
isNormalMap = false;
imageArray.resize(1, NULL);
}
Private(const Private & p) // Copy ctor. inits refcount to 0.
{
type = p.type;
wrapMode = p.wrapMode;
alphaMode = p.alphaMode;
isNormalMap = p.isNormalMap;
imageArray = p.imageArray;
}
~Private()
{
const uint count = imageArray.count();
for (uint i = 0; i < count; i++) {
delete imageArray[i];
}
}
TextureType type;
WrapMode wrapMode;
AlphaMode alphaMode;
bool isNormalMap;
nv::Array<nv::FloatImage *> imageArray;
};
} // nvtt namespace
#endif // NV_TT_TEXIMAGE_H

File diff suppressed because it is too large Load Diff

1119
src/nvtt/cuda/Bitmaps.h Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,380 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Containers.h>
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvtt/CompressionOptions.h>
#include <nvtt/OutputOptions.h>
#include <nvtt/QuickCompressDXT.h>
#include <nvtt/OptimalCompressDXT.h>
#include "CudaCompressDXT.h"
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda_runtime_api.h>
#endif
#include <time.h>
#include <stdio.h>
using namespace nv;
using namespace nvtt;
#if defined HAVE_CUDA
#define MAX_BLOCKS 8192U // 32768, 65535
extern "C" void setupCompressKernel(const float weights[3]);
extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
#include "Bitmaps.h" // @@ Rename to BitmapTable.h
// Convert linear image to block linear.
static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
{
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
for(uint by = 0; by < h; by++) {
for(uint bx = 0; bx < w; bx++) {
const uint bw = min(image->width() - bx * 4, 4U);
const uint bh = min(image->height() - by * 4, 4U);
for (uint i = 0; i < 16; i++) {
const int x = (i % 4) % bw;
const int y = (i / 4) % bh;
blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
}
}
}
}
#endif
CudaCompressor::CudaCompressor() : m_bitmapTable(NULL), m_data(NULL), m_result(NULL)
{
#if defined HAVE_CUDA
// Allocate and upload bitmaps.
cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint));
if (m_bitmapTable != NULL)
{
cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
}
// Allocate scratch buffers.
cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U);
cudaMalloc((void**) &m_result, MAX_BLOCKS * 8U);
#endif
}
CudaCompressor::~CudaCompressor()
{
#if defined HAVE_CUDA
// Free device mem allocations.
cudaFree(m_data);
cudaFree(m_result);
cudaFree(m_bitmapTable);
#endif
}
bool CudaCompressor::isValid() const
{
#if defined HAVE_CUDA
if (cudaGetLastError() != cudaSuccess)
{
return false;
}
#endif
return m_data != NULL && m_result != NULL && m_bitmapTable != NULL;
}
// @@ This code is very repetitive and needs to be cleaned up.
void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
/// Compress image using CUDA.
void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
clock_t start = clock();
setupCompressKernel(compressionOptions.colorWeight.ptr());
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernelDXT1(count, m_data, m_result, m_bitmapTable);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
/// Compress image using CUDA.
void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
AlphaBlockDXT3 * alphaBlocks = NULL;
alphaBlocks = (AlphaBlockDXT3 *)malloc(min(compressedSize, MAX_BLOCKS * 8U));
setupCompressKernel(compressionOptions.colorWeight.ptr());
clock_t start = clock();
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
OptimalCompress::compressDXT3A(rgba, alphaBlocks + i);
}
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
for (uint i = 0; i < count; i++)
{
outputOptions.outputHandler->writeData(alphaBlocks + i, 8);
outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8);
}
}
bn += count;
}
clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
/// Compress image using CUDA.
void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
AlphaBlockDXT5 * alphaBlocks = NULL;
alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, MAX_BLOCKS * 8U));
setupCompressKernel(compressionOptions.colorWeight.ptr());
clock_t start = clock();
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
QuickCompress::compressDXT5A(rgba, alphaBlocks + i);
}
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
for (uint i = 0; i < count; i++)
{
outputOptions.outputHandler->writeData(alphaBlocks + i, 8);
outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8);
}
}
bn += count;
}
clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}

View File

@ -1,40 +1,61 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_COMPRESSORRGB_H
#define NV_TT_COMPRESSORRGB_H
#include "nvtt.h"
#include "Compressor.h"
namespace nv
{
struct PixelFormatConverter : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
} // nv namespace
#endif // NV_TT_COMPRESSORRGB_H
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_CUDACOMPRESSDXT_H
#define NV_TT_CUDACOMPRESSDXT_H
#include <nvimage/nvimage.h>
#include <nvtt/nvtt.h>
namespace nv
{
class Image;
class CudaCompressor
{
public:
CudaCompressor();
~CudaCompressor();
bool isValid() const;
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
uint * m_bitmapTable;
uint * m_data;
uint * m_result;
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
} // nv namespace
#endif // NV_TT_CUDAUTILS_H

View File

@ -1,649 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Containers.h>
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvtt/CompressionOptions.h>
#include <nvtt/OutputOptions.h>
#include <nvtt/QuickCompressDXT.h>
#include <nvtt/OptimalCompressDXT.h>
#include "CudaCompressorDXT.h"
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda_runtime_api.h>
#endif
#include <time.h>
#include <stdio.h>
using namespace nv;
using namespace nvtt;
#if defined HAVE_CUDA
#define MAX_BLOCKS 8192U // 32768, 65535
extern "C" void setupCompressKernel(const float weights[3]);
extern "C" void bindTextureToArray(cudaArray * d_data);
extern "C" void compressKernelDXT1(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps);
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressKernelDXT3(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps);
//extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
//extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
#include "BitmapTable.h"
/*
// Convert linear image to block linear.
static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
{
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
for(uint by = 0; by < h; by++) {
for(uint bx = 0; bx < w; bx++) {
const uint bw = min(image->width() - bx * 4, 4U);
const uint bh = min(image->height() - by * 4, 4U);
for (uint i = 0; i < 16; i++) {
const int x = (i % 4) % bw;
const int y = (i / 4) % bh;
blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
}
}
}
}
*/
#endif
CudaContext::CudaContext() :
bitmapTable(NULL),
bitmapTableCTX(NULL),
data(NULL),
result(NULL)
{
#if defined HAVE_CUDA
// Allocate and upload bitmaps.
cudaMalloc((void**) &bitmapTable, 992 * sizeof(uint));
if (bitmapTable != NULL)
{
cudaMemcpy(bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
}
cudaMalloc((void**) &bitmapTableCTX, 704 * sizeof(uint));
if (bitmapTableCTX != NULL)
{
cudaMemcpy(bitmapTableCTX, s_bitmapTableCTX, 704 * sizeof(uint), cudaMemcpyHostToDevice);
}
// Allocate scratch buffers.
cudaMalloc((void**) &data, MAX_BLOCKS * 64U);
cudaMalloc((void**) &result, MAX_BLOCKS * 8U);
#endif
}
CudaContext::~CudaContext()
{
#if defined HAVE_CUDA
// Free device mem allocations.
cudaFree(bitmapTableCTX);
cudaFree(bitmapTable);
cudaFree(data);
cudaFree(result);
#endif
}
bool CudaContext::isValid() const
{
#if defined HAVE_CUDA
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(err));
return false;
}
#endif
return bitmapTable != NULL && bitmapTableCTX != NULL && data != NULL && result != NULL;
}
#if defined HAVE_CUDA
CudaCompressor::CudaCompressor(CudaContext & ctx) : m_ctx(ctx)
{
}
void CudaCompressor::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Allocate image as a cuda array.
cudaArray * d_image;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
cudaMallocArray(&d_image, &channelDesc, w, h);
const int imageSize = w * h * sizeof(uint);
cudaMemcpyToArray(d_image, 0, 0, data, imageSize, cudaMemcpyHostToDevice);
}
else
{
#pragma message(NV_FILE_LINE "FIXME: Floating point textures not really supported by CUDA compressors.")
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
cudaMallocArray(&d_image, &channelDesc, w, h);
const int imageSize = w * h * sizeof(uint);
cudaMemcpyToArray(d_image, 0, 0, data, imageSize, cudaMemcpyHostToDevice);
}
// Image size in blocks.
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
const uint bs = blockSize();
const uint blockNum = bw * bh;
const uint compressedSize = blockNum * bs;
void * h_result = malloc(min(blockNum, MAX_BLOCKS) * bs);
setup(d_image, compressionOptions);
// Timer timer;
// timer.start();
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
compressBlocks(bn, count, w, h, alphaMode, compressionOptions, h_result);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
//nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(h_result, count * bs);
}
bn += count;
}
//timer.stop();
//printf("\rCUDA time taken: %.3f seconds\n", timer.elapsed() / CLOCKS_PER_SEC);
free(h_result);
cudaFreeArray(d_image);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
void CudaCompressorDXT1::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions)
{
setupCompressKernel(compressionOptions.colorWeight.ptr());
bindTextureToArray(image);
}
void CudaCompressorDXT1::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
// Launch kernel.
compressKernelDXT1(first, count, w, m_ctx.result, m_ctx.bitmapTable);
// Copy result to host.
cudaMemcpy(output, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
}
void CudaCompressorDXT3::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions)
{
setupCompressKernel(compressionOptions.colorWeight.ptr());
bindTextureToArray(image);
}
void CudaCompressorDXT3::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
// Launch kernel.
compressKernelDXT3(first, count, w, m_ctx.result, m_ctx.bitmapTable);
// Copy result to host.
cudaMemcpy(output, m_ctx.result, count * 16, cudaMemcpyDeviceToHost);
}
void CudaCompressorDXT5::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions)
{
setupCompressKernel(compressionOptions.colorWeight.ptr());
bindTextureToArray(image);
}
void CudaCompressorDXT5::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
/*// Launch kernel.
compressKernelDXT5(first, count, w, m_ctx.result, m_ctx.bitmapTable);
// Copy result to host.
cudaMemcpy(output, m_ctx.result, count * 16, cudaMemcpyDeviceToHost);*/
// Launch kernel.
if (alphaMode == AlphaMode_Transparency)
{
// compressWeightedKernelDXT1(first, count, w, m_ctx.result, m_ctx.bitmapTable);
}
else
{
// compressKernelDXT1_Level4(first, count, w, m_ctx.result, m_ctx.bitmapTable);
}
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
{
//ColorBlock rgba(blockLinearImage + (first + i) * 16);
//OptimalCompress::compressDXT3A(rgba, alphaBlocks + i);
}
// Copy result to host.
cudaMemcpy(output, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
// @@ Interleave color and alpha blocks.
}
// @@ This code is very repetitive and needs to be cleaned up.
#if 0
/// Compress image using CUDA.
void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
AlphaBlockDXT3 * alphaBlocks = NULL;
alphaBlocks = (AlphaBlockDXT3 *)malloc(min(compressedSize, MAX_BLOCKS * 8U));
setupCompressKernel(compressionOptions.colorWeight.ptr());
clock_t start = clock();
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
}
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
OptimalCompress::compressDXT3A(rgba, alphaBlocks + i);
}
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
for (uint i = 0; i < count; i++)
{
outputOptions.outputHandler->writeData(alphaBlocks + i, 8);
outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8);
}
}
bn += count;
}
clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
/// Compress image using CUDA.
void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage);
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
AlphaBlockDXT5 * alphaBlocks = NULL;
alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, MAX_BLOCKS * 8U));
setupCompressKernel(compressionOptions.colorWeight.ptr());
clock_t start = clock();
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
}
// Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++)
{
ColorBlock rgba(blockLinearImage + (bn + i) * 16);
QuickCompress::compressDXT5A(rgba, alphaBlocks + i);
}
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
for (uint i = 0; i < count; i++)
{
outputOptions.outputHandler->writeData(alphaBlocks + i, 8);
outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8);
}
}
bn += count;
}
clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
void CudaCompressor::compressDXT1n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
clock_t start = clock();
setupCompressKernel(compressionOptions.colorWeight.ptr());
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressNormalKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
void CudaCompressor::compressCTX1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (m_image->width() + 3) / 4;
const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
clock_t start = clock();
setupCompressKernel(compressionOptions.colorWeight.ptr());
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernelCTX1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTableCTX);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
void CudaCompressor::compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// @@ TODO
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
#endif // 0
#endif // defined HAVE_CUDA

View File

@ -1,112 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_CUDACOMPRESSORDXT_H
#define NV_TT_CUDACOMPRESSORDXT_H
#include "nvtt/nvtt.h"
#include "../Compressor.h" // CompressorInterface
struct cudaArray;
namespace nv
{
class CudaContext
{
public:
CudaContext();
~CudaContext();
bool isValid() const;
public:
// Device pointers.
uint * bitmapTable;
uint * bitmapTableCTX;
uint * data;
uint * result;
};
#if defined HAVE_CUDA
struct CudaCompressor : public CompressorInterface
{
CudaCompressor(CudaContext & ctx);
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, const void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) = 0;
virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
virtual uint blockSize() const = 0;
protected:
CudaContext & m_ctx;
};
struct CudaCompressorDXT1 : public CudaCompressor
{
CudaCompressorDXT1(CudaContext & ctx) : CudaCompressor(ctx) {}
virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions);
virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; };
};
/*struct CudaCompressorDXT1n : public CudaCompressor
{
virtual void setup(const CompressionOptions::Private & compressionOptions);
virtual void compressBlocks(uint blockCount, const void * input, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
virtual uint blockSize() const { return 8; };
};*/
struct CudaCompressorDXT3 : public CudaCompressor
{
CudaCompressorDXT3(CudaContext & ctx) : CudaCompressor(ctx) {}
virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions);
virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; };
};
struct CudaCompressorDXT5 : public CudaCompressor
{
CudaCompressorDXT5(CudaContext & ctx) : CudaCompressor(ctx) {}
virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions);
virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; };
};
/*struct CudaCompressorCXT1 : public CudaCompressor
{
virtual void setup(const CompressionOptions::Private & compressionOptions);
virtual void compressBlocks(uint blockCount, const void * input, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
virtual uint blockSize() const { return 8; };
};*/
#endif // defined HAVE_CUDA
} // nv namespace
#endif // NV_TT_CUDAUTILS_H

View File

@ -26,6 +26,7 @@
#ifndef CUDAMATH_H
#define CUDAMATH_H
#include <float.h>
inline __device__ __host__ float3 operator *(float3 a, float3 b)
@ -86,69 +87,6 @@ inline __device__ __host__ bool operator ==(float3 a, float3 b)
return a.x == b.x && a.y == b.y && a.z == b.z;
}
// float2 operators
inline __device__ __host__ float2 operator *(float2 a, float2 b)
{
return make_float2(a.x*b.x, a.y*b.y);
}
inline __device__ __host__ float2 operator *(float f, float2 v)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator *(float2 v, float f)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator +(float2 a, float2 b)
{
return make_float2(a.x+b.x, a.y+b.y);
}
inline __device__ __host__ void operator +=(float2 & b, float2 a)
{
b.x += a.x;
b.y += a.y;
}
inline __device__ __host__ float2 operator -(float2 a, float2 b)
{
return make_float2(a.x-b.x, a.y-b.y);
}
inline __device__ __host__ void operator -=(float2 & b, float2 a)
{
b.x -= a.x;
b.y -= a.y;
}
inline __device__ __host__ float2 operator /(float2 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float2 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
}
inline __device__ __host__ bool operator ==(float2 a, float2 b)
{
return a.x == b.x && a.y == b.y;
}
inline __device__ __host__ float dot(float2 a, float2 b)
{
return a.x * b.x + a.y * b.y;
}
inline __device__ __host__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
@ -181,16 +119,6 @@ inline __device__ __host__ float3 normalize(float3 v)
return make_float3(v.x * len, v.y * len, v.z * len);
}
inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
{
const float s = 1.0f - t;
return make_float3(s * a.x + t * b.x, s * a.y + t * b.y, s * a.z + t * b.z);
}
inline __device__ __host__ float lengthSquared(float3 a)
{
return dot(a, a);
}
@ -199,8 +127,21 @@ inline __device__ __host__ float lengthSquared(float3 a)
inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
{
// 8 iterations seems to be more than enough.
float3 row0 = make_float3(matrix[0], matrix[1], matrix[2]);
float3 row1 = make_float3(matrix[1], matrix[3], matrix[4]);
float3 row2 = make_float3(matrix[2], matrix[4], matrix[5]);
float3 v = make_float3(1.0f, 1.0f, 1.0f);
float r0 = dot(row0, row0);
float r1 = dot(row1, row1);
float r2 = dot(row2, row2);
float3 v;
if (r0 > r1 && r0 > r2) v = row0;
else if (r1 > r2) v = row1;
else v = row2;
//float3 v = make_float3(1.0f, 1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
@ -315,89 +256,5 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, fl
return firstEigenVector(covariance);
}
// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
{
// @@ 8 iterations is probably more than enough.
float2 v = make_float2(1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1];
float y = v.x * matrix[1] + v.y * matrix[2];
float m = max(x, y);
float iv = 1.0f / m;
if (m == 0.0f) iv = 0.0f;
v = make_float2(x*iv, y*iv);
}
return v;
}
inline __device__ void colorSums(const float2 * colors, float2 * sums)
{
#if __DEVICE_EMULATION__
float2 color_sum = make_float2(0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx];
sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1];
#endif
}
inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[3] = {0, 0, 0};
for (int i = 0; i < 16; i++)
{
float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[2] += a.y * a.y;
}
#else
const int idx = threadIdx.x;
float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
__shared__ float covariance[16*3];
covariance[3 * idx + 0] = diff.x * diff.x;
covariance[3 * idx + 1] = diff.x * diff.y;
covariance[3 * idx + 2] = diff.y * diff.y;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector2D(covariance);
}
#endif // CUDAMATH_H

View File

@ -1,239 +1,300 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
using namespace nv;
using namespace cuda;
/* @@ Move this to win32 utils or somewhere else.
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
}
#endif
*/
static bool isCudaDriverAvailable(int version)
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
nvDebug("*** CUDA driver not found.\n");
return false;
}
if (version >= 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.0.\n");
return false;
}
}
if (version >= 2010)
{
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.1.\n");
return false;
}
}
if (version >= 2020)
{
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
if (driverGetVersion == NULL) {
nvDebug("*** CUDA driver version < 2.2.\n");
return false;
}
int driverVersion;
CUresult err = driverGetVersion(&driverVersion);
if (err != CUDA_SUCCESS) {
nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
return false;
}
return driverVersion >= version;
}
#endif // HAVE_CUDA
return true;
}
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
return false;
}
int count = deviceCount();
if (count == 1)
{
// Make sure it's not an emulation device.
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
// deviceProp.name != Device Emulation (CPU)
if (deviceProp.major == -1 || deviceProp.minor == -1)
{
return false;
}
}
// @@ Make sure that warp size == 32
return count > 0;
#else
return false;
#endif
}
/// Get number of CUDA enabled devices.
int nv::cuda::deviceCount()
{
#if defined HAVE_CUDA
int gpuCount = 0;
cudaError_t result = cudaGetDeviceCount(&gpuCount);
if (result == cudaSuccess)
{
return gpuCount;
}
#endif
return 0;
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = 0;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
int current_device = 0;
while (current_device < device_count)
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, current_device);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major != -1 && device_properties.minor != -1)
{
if( gflops > max_gflops )
{
max_gflops = gflops;
max_gflops_device = current_device;
}
}
current_device++;
}
#endif
return max_gflops_device;
}
/// Activate the given devices.
bool nv::cuda::setDevice(int i)
{
nvCheck(i < deviceCount());
#if defined HAVE_CUDA
cudaError_t result = cudaSetDevice(i);
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
return result == cudaSuccess;
#else
return false;
#endif
}
void nv::cuda::exit()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
#endif
}
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
using namespace nv;
using namespace cuda;
/* @@ Move this to win32 utils or somewhere else.
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
}
#endif
*/
static bool isCudaDriverAvailable(int version)
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
nvDebug("*** CUDA driver not found.\n");
return false;
}
if (version >= 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.0.\n");
return false;
}
}
if (version >= 2010)
{
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.1.\n");
return false;
}
}
if (version >= 2020)
{
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
if (driverGetVersion == NULL) {
nvDebug("*** CUDA driver version < 2.2.\n");
return false;
}
int driverVersion;
CUresult err = driverGetVersion(&driverVersion);
if (err != CUDA_SUCCESS) {
nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
return false;
}
return driverVersion >= version;
}
#endif // HAVE_CUDA
return true;
}
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
return false;
}
int count = deviceCount();
if (count == 1)
{
// Make sure it's not an emulation device.
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
// deviceProp.name != Device Emulation (CPU)
if (deviceProp.major == -1 || deviceProp.minor == -1)
{
return false;
}
}
// @@ Make sure that warp size == 32
// @@ Make sure available GPU is faster than the CPU.
return count > 0;
#else
return false;
#endif
}
/// Get number of CUDA enabled devices.
int nv::cuda::deviceCount()
{
#if defined HAVE_CUDA
int gpuCount = 0;
cudaError_t result = cudaGetDeviceCount(&gpuCount);
if (result == cudaSuccess)
{
return gpuCount;
}
#endif
return 0;
}
// Make sure device meets requirements:
// - Not an emulation device.
// - Not an integrated device?
// - Faster than CPU.
bool nv::cuda::isValidDevice(int i)
{
#if defined HAVE_CUDA
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major == -1 || device_properties.minor == -1) {
// Emulation device.
return false;
}
#if CUDART_VERSION >= 2030 // 2.3
/*if (device_properties.integrated)
{
// Integrated devices.
return false;
}*/
#endif
return true;
#else
return false;
#endif
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = -1;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
for (int i = 0; i < device_count; i++)
{
if (isValidDevice(i))
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (gflops > max_gflops)
{
max_gflops = gflops;
max_gflops_device = i;
}
}
}
#endif
return max_gflops_device;
}
/// Activate the given devices.
bool nv::cuda::initDevice(int * device_ptr)
{
nvDebugCheck(device_ptr != NULL);
#if defined HAVE_CUDA
#if CUDART_VERSION >= 2030 // 2.3
// Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active.
cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield);
#endif
int device = getFastestDevice();
if (device == -1)
{
// No device is fast enough.
*device_ptr = -1;
return false;
}
// Select CUDA device.
cudaError_t result = cudaSetDevice(device);
if (result == cudaErrorSetOnActiveProcess)
{
int device;
result = cudaGetDevice(&device);
*device_ptr = -1; // No device to cleanup.
return isValidDevice(device); // Return true if device is valid.
}
else if (result != cudaSuccess)
{
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
*device_ptr = -1;
return false;
}
*device_ptr = device;
return true;
#else
return false;
#endif
}
void nv::cuda::exitDevice()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
#endif
}

View File

@ -32,8 +32,10 @@ namespace nv
bool isHardwarePresent();
int deviceCount();
int getFastestDevice();
bool setDevice(int i);
void exit();
bool isValidDevice(int i);
bool initDevice(int * device_ptr);
void exitDevice();
};
} // nv namespace

View File

@ -1,60 +0,0 @@
#include "nvtt_experimental.h"
struct NvttTexture
{
NvttTexture() :
m_constant(false),
m_image(NULL),
m_floatImage(NULL)
{
}
~NvttTexture()
{
if (m_constant && m_image) m_image->unwrap();
delete m_image;
delete m_floatImage;
}
bool m_constant;
Image * m_image;
FloatImage * m_floatImage;
};
NvttTexture * nvttCreateTexture()
{
return new NvttTexture();
}
void nvttDestroyTexture(NvttTexture * tex)
{
delete tex;
}
void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data)
{
nvCheck(img != NULL);
if (format == NVTT_InputFormat_BGRA_8UB)
{
img->m_constant = false;
img->m_image->allocate(w, h);
memcpy(img->m_image->pixels(), data, w * h * 4);
}
else
{
nvCheck(false);
}
}
void nvttCompressImage(NvttImage * img, NvttFormat format)
{
nvCheck(img != NULL);
// @@ Invoke appropriate compressor.
}
#endif // NVTT_EXPERIMENTAL_H

View File

@ -1,103 +0,0 @@
#ifndef NVTT_EXPERIMENTAL_H
#define NVTT_EXPERIMENTAL_H
#include <nvtt/nvtt.h>
typedef struct NvttTexture NvttTexture;
typedef struct NvttOutputOptions NvttOutputOptions;
// Global functions
void nvttInitialize(...);
unsigned int nvttGetVersion();
const char * nvttGetErrorString(unsigned int error);
// Texture functions
NvttTexture * nvttCreateTexture();
void nvttDestroyTexture(NvttTexture * tex);
void nvttSetTexture2D(NvttTexture * tex, NvttInputFormat format, uint w, uint h, uint idx, void * data);
void nvttResize(NvttTexture * img, uint w, uint h);
unsigned int nvttDownsample(NvttTexture * img);
void nvttOutputCompressed(NvttTexture * img, NvttOutputFormat format);
void nvttOutputPixelFormat(NvttTexture * img, NvttOutputFormat format);
// How to control the compression parameters?
// Using many arguments:
// void nvttCompressImage(img, format, quality, r, g, b, a, ...);
// Using existing compression option class:
// compressionOptions = nvttCreateCompressionOptions();
// nvttSetCompressionOptionsFormat(compressionOptions, format);
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
// nvttSetCompressionOptionsColorWeights(compressionOptions, r, g, b, a);
// ...
// nvttCompressImage(img, compressionOptions);
// Using thread local context state:
// void nvttSetCompressionFormat(format);
// void nvttSetCompressionQuality(quality);
// void nvttSetCompressionColorWeights(r, g, b, a);
// ...
// nvttCompressImage(img);
// Using thread local context state, but with GL style function arguments:
// nvttCompressorParameteri(NVTT_FORMAT, format);
// nvttCompressorParameteri(NVTT_QUALITY, quality);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_RED, r);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_GREEN, g);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_BLUE, b);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_ALPHA, a);
// or nvttCompressorParameter4f(NVTT_COLOR_WEIGHTS, r, g, b, a);
// ...
// nvttCompressImage(img);
// How do we get the compressed output?
// - Using callbacks. (via new entrypoints, or through outputOptions)
// - Return it explicitely from nvttCompressImage.
// - Store it along the image, retrieve later explicitely with 'nvttGetCompressedData(img, ...)'
/*
// Global functions
void nvttInitialize(...);
unsigned int nvttGetVersion();
const char * nvttGetErrorString(unsigned int error);
// Context object
void nvttCreateContext();
void nvttDestroyContext();
void nvttSetParameter1i(unsigned int name, int value);
void nvttSetParameter1f(unsigned int name, float value);
void nvttSetParameter2f(unsigned int name, float v0, float v1);
void nvttSetParameter3f(unsigned int name, float v0, float v1, float v2);
void nvttSetParameter4f(unsigned int name, float v0, float v1, float v2, float v3);
// Image object
NvttImage * nvttCreateImage();
void nvttDestroyImage(NvttImage * img);
void nvttSetImageData(NvttImage * image, NvttInputFormat format, unsigned int w, unsigned int h, void * data);
void nvttSetImageParameter1i(NvttImage * image, unsigned int name, int value);
void nvttSetImageParameter1f(NvttImage * image, unsigned int name, float value);
void nvttResizeImage(NvttImage * image, unsigned int w, unsigned int h);
void nvttQuantizeImage(NvttImage * image, bool dither, unsigned int rbits, unsigned int gbits, unsigned int bbits, unsigned int abits);
void nvttCompressImage(NvttImage * image, void * buffer, int size);
*/
#endif // NVTT_EXPERIMENTAL_H

View File

@ -1,61 +0,0 @@
#include "nvtt_experimental.h"
/*
Errors in the original API:
- Too many memory copies.
- Implementation too complicated.
- Error output should not be in output options.
- Data driven interface. Follows the dialog model. Provide all the data upfront.
*/
// Output texture with mipmaps
void example0()
{
CompressionOptions compressionOptions;
OutputOptions outputOptions;
Texture img;
img.setTexture2D(format, w, h, 0, data);
Compressor context;
context.outputHeader(outputOptions);
context.outputCompressed(img, compressionOptions, outputOptions);
img.toLinear(2.2);
while (img.downsample(NVTT_FILTER_BOX))
{
img.toGamma(2.2);
outputCompressed(img, compressionOptions, outputOptions);
}
}
// Output texture with colored mipmaps
void example1()
{
CompressionOptions compressionOptions;
OutputOptions outputOptions;
Texture img;
img.setTexture2D(format, w, h, 0, data);
Compressor context;
context.outputHeader(outputOptions);
context.outputCompressed(img, compressionOptions, outputOptions);
img.toLinear(2.2);
while (img.downsample(NVTT_FILTER_BOX))
{
img.toGamma(2.2);
Texture mipmap = img;
mipmap.blend(color[i].r, color[i].g, color[i].b, 0.5f);
context.outputCompressed(mipmap, compressionOptions, outputOptions);
}
}

View File

@ -42,8 +42,6 @@ const char * nvtt::errorString(Error e)
return "Error opening file";
case Error_FileWrite:
return "Error writing through output handler";
case Error_UnsupportedOutputFormat:
return "The container file does not support the selected output format";
}
return "Invalid error";

View File

@ -21,7 +21,6 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#pragma once
#ifndef NV_TT_H
#define NV_TT_H
@ -48,15 +47,12 @@
# define NVTT_API
#endif
#define NVTT_VERSION 201
#define NVTT_VERSION 200
#define NVTT_FORBID_COPY(Class) \
#define NVTT_DECLARE_PIMPL(Class) \
private: \
Class(const Class &); \
void operator=(const Class &); \
public:
#define NVTT_DECLARE_PIMPL(Class) \
public: \
struct Private; \
Private & m
@ -65,9 +61,6 @@
// Public interface.
namespace nvtt
{
// Forward declarations.
struct TexImage;
/// Supported compression formats.
enum Format
{
@ -90,25 +83,6 @@ namespace nvtt
Format_BC3n = Format_DXT5n,
Format_BC4, // ATI1
Format_BC5, // 3DC, ATI2
Format_DXT1n,// Not supported on CPU yet.
Format_CTX1, // Not supported on CPU yet.
Format_YCoCg_DXT5, // Not supported yet.
Format_BC6, // Not supported yet.
Format_BC7, // Not supported yet.
Format_RGBE,
};
/// Pixel types.
enum PixelType
{
PixelType_UnsignedNorm,
PixelType_SignedNorm, // Not supported yet.
PixelType_UnsignedInt, // Not supported yet.
PixelType_SignedInt, // Not supported yet.
PixelType_Float,
};
/// Quality modes.
@ -123,7 +97,6 @@ namespace nvtt
/// Compression options. This class describes the desired compression format and other compression settings.
struct CompressionOptions
{
NVTT_FORBID_COPY(CompressionOptions);
NVTT_DECLARE_PIMPL(CompressionOptions);
NVTT_API CompressionOptions();
@ -139,23 +112,10 @@ namespace nvtt
// Set color mask to describe the RGB/RGBA format.
NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask);
NVTT_API void setPixelFormat(unsigned char rsize, unsigned char gsize, unsigned char bsize, unsigned char asize);
NVTT_API void setPixelType(PixelType pixelType);
NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
};
/*
// DXGI_FORMAT_R16G16_FLOAT
compressionOptions.setPixelType(PixelType_Float);
compressionOptions.setPixelFormat2(16, 16, 0, 0);
// DXGI_FORMAT_R32G32B32A32_FLOAT
compressionOptions.setPixelType(PixelType_Float);
compressionOptions.setPixelFormat2(32, 32, 32, 32);
*/
/// Wrap modes.
enum WrapMode
@ -177,7 +137,8 @@ namespace nvtt
enum InputFormat
{
InputFormat_BGRA_8UB,
InputFormat_RGBA_32F,
// InputFormat_RGBE_8UB,
// InputFormat_BGRA_32F,
};
/// Mipmap downsampling filters.
@ -188,23 +149,11 @@ namespace nvtt
MipmapFilter_Kaiser, ///< Kaiser-windowed Sinc filter is the best downsampling filter.
};
/// Texture resize filters.
enum ResizeFilter
{
ResizeFilter_Box,
ResizeFilter_Triangle,
ResizeFilter_Kaiser,
ResizeFilter_Mitchell,
};
/// Color transformation.
enum ColorTransform
{
ColorTransform_None,
ColorTransform_Linear, ///< Not implemented.
ColorTransform_Swizzle, ///< Not implemented.
ColorTransform_YCoCg, ///< Transform into r=Co, g=Cg, b=0, a=Y
ColorTransform_ScaledYCoCg, ///< Not implemented.
ColorTransform_Linear,
};
/// Extents rounding mode.
@ -227,7 +176,6 @@ namespace nvtt
/// Input options. Specify format and layout of the input texture.
struct InputOptions
{
NVTT_FORBID_COPY(InputOptions);
NVTT_DECLARE_PIMPL(InputOptions);
NVTT_API InputOptions();
@ -242,18 +190,17 @@ namespace nvtt
// Set mipmap data. Copies the data.
NVTT_API bool setMipmapData(const void * data, int w, int h, int d = 1, int face = 0, int mipmap = 0);
NVTT_API bool setMipmapChannelData(const void * data, int channel, int w, int h, int d = 1, int face = 0, int mipmap = 0);
// Describe the format of the input.
NVTT_API void setFormat(InputFormat format);
// Set the way the input alpha channel is interpreted. @@ Not implemented!
// Set the way the input alpha channel is interpreted.
NVTT_API void setAlphaMode(AlphaMode alphaMode);
// Set gamma settings.
NVTT_API void setGamma(float inputGamma, float outputGamma);
// Set texture wrapping mode.
// Set texture wrappign mode.
NVTT_API void setWrapMode(WrapMode mode);
// Set mipmapping options.
@ -268,18 +215,13 @@ namespace nvtt
NVTT_API void setNormalFilter(float sm, float medium, float big, float large);
NVTT_API void setNormalizeMipmaps(bool b);
// Set color transforms.
// Set color transforms. @@ Not implemented!
NVTT_API void setColorTransform(ColorTransform t);
NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3);
NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3, float offset);
NVTT_API void setSwizzleTransform(int x, int y, int z, int w);
// Set resizing options.
NVTT_API void setMaxExtents(int d);
NVTT_API void setRoundMode(RoundMode mode);
// Set whether or not to premultiply color by alpha
NVTT_API void setPremultiplyAlpha(bool b);
};
@ -304,7 +246,6 @@ namespace nvtt
Error_CudaError,
Error_FileOpen,
Error_FileWrite,
Error_UnsupportedOutputFormat,
};
/// Error handler.
@ -316,19 +257,11 @@ namespace nvtt
virtual void error(Error e) = 0;
};
/// Container.
enum Container
{
Container_DDS,
Container_DDS10,
};
/// Output Options. This class holds pointers to the interfaces that are used to report the output of
/// the compressor to the user.
struct OutputOptions
{
NVTT_FORBID_COPY(OutputOptions);
NVTT_DECLARE_PIMPL(OutputOptions);
NVTT_API OutputOptions();
@ -342,132 +275,34 @@ namespace nvtt
NVTT_API void setOutputHandler(OutputHandler * outputHandler);
NVTT_API void setErrorHandler(ErrorHandler * errorHandler);
NVTT_API void setOutputHeader(bool outputHeader);
NVTT_API void setContainer(Container container);
};
/// Context.
/// Texture compressor.
struct Compressor
{
NVTT_FORBID_COPY(Compressor);
NVTT_DECLARE_PIMPL(Compressor);
NVTT_API Compressor();
NVTT_API ~Compressor();
// Context settings.
NVTT_API void enableCudaAcceleration(bool enable);
NVTT_API bool isCudaAccelerationEnabled() const;
// InputOptions api.
// Main entrypoint of the compression library.
NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
// Estimate the size of compressing the input with the given options.
NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const;
// RAW api.
NVTT_API bool compress2D(InputFormat format, int w, int h, void * data, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
//ßNVTT_API bool compress3D(InputFormat format, int w, int h, int d, void * data, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
NVTT_API int estimateSize(int w, int h, int d, const CompressionOptions & compressionOptions) const;
// TexImage api.
NVTT_API TexImage createTexImage() const;
NVTT_API bool outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
NVTT_API bool compress(const TexImage & tex, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
NVTT_API int estimateSize(const TexImage & tex, const CompressionOptions & compressionOptions) const;
};
// "Compressor" is deprecated. This should have been called "Context"
typedef Compressor Context;
/// DXT decoder.
enum Decoder
{
Decoder_Reference,
Decoder_NV5x,
};
/// A texture mipmap.
struct TexImage
{
NVTT_API TexImage();
NVTT_API TexImage(const TexImage & tex);
NVTT_API ~TexImage();
NVTT_API void operator=(const TexImage & tex);
// Texture parameters.
NVTT_API void setTextureType(TextureType type);
NVTT_API void setWrapMode(WrapMode mode);
NVTT_API void setAlphaMode(AlphaMode alphaMode);
NVTT_API void setNormalMap(bool isNormalMap);
// Accessors.
NVTT_API int width() const;
NVTT_API int height() const;
NVTT_API int depth() const;
NVTT_API int faceCount() const;
NVTT_API TextureType textureType() const;
NVTT_API WrapMode wrapMode() const;
NVTT_API AlphaMode alphaMode() const;
NVTT_API bool isNormalMap() const;
NVTT_API int countMipmaps() const;
// Texture data.
NVTT_API bool load(const char * fileName);
NVTT_API bool save(const char * fileName) const;
NVTT_API bool setImage2D(InputFormat format, int w, int h, int idx, const void * data);
NVTT_API bool setImage2D(InputFormat format, int w, int h, int idx, const void * r, const void * g, const void * b, const void * a);
NVTT_API bool setImage2D(Format format, Decoder decoder, int w, int h, int idx, const void * data);
// Resizing methods.
NVTT_API void resize(int w, int h, ResizeFilter filter);
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter);
NVTT_API bool buildNextMipmap(MipmapFilter filter);
// Color transforms.
NVTT_API void toLinear(float gamma);
NVTT_API void toGamma(float gamma);
NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]);
NVTT_API void swizzle(int r, int g, int b, int a);
NVTT_API void scaleBias(int channel, float scale, float bias);
NVTT_API void packNormal();
NVTT_API void expandNormal();
NVTT_API void blend(float r, float g, float b, float a, float t);
NVTT_API void premultiplyAlpha();
NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale);
NVTT_API void setBorder(float r, float g, float b, float a);
NVTT_API void fill(float r, float g, float b, float a);
// Set normal map options.
NVTT_API void toNormalMap(float sm, float medium, float big, float large);
NVTT_API void toHeightMap();
NVTT_API void normalizeNormalMap();
// Error compare.
NVTT_API float rootMeanSquaredError_rgb(const TexImage & reference) const;
NVTT_API float rootMeanSquaredError_alpha(const TexImage & reference) const;
// Geometric transforms.
NVTT_API void flipVertically();
private:
void detach();
struct Private;
Private * m;
};
// Return string for the given error code.
NVTT_API const char * errorString(Error e);
// Return NVTT version.
NVTT_API unsigned int version();
// Set callbacks.
//NVTT_API void setErrorCallback(ErrorCallback callback);
//NVTT_API void setMemoryCallbacks(...);
} // nvtt namespace
#endif // NV_TT_H

View File

@ -185,16 +185,6 @@ void nvttSetOutputOptionsOutputHandler(NvttOutputOptions * outputOptions, nvttOu
// Compressor class.
NvttCompressor * nvttCreateCompressor()
{
return new nvtt::Compressor();
}
void nvttDestroyCompressor(NvttCompressor * compressor)
{
delete compressor;
}
NvttBoolean nvttCompress(const NvttCompressor * compressor, const NvttInputOptions * inputOptions, const NvttCompressionOptions * compressionOptions, const NvttOutputOptions * outputOptions)
{
return (NvttBoolean)compressor->process(*inputOptions, *compressionOptions, *outputOptions);

View File

@ -47,7 +47,7 @@
# define NVTT_API
#endif
#define NVTT_VERSION 201
#define NVTT_VERSION 200
#ifdef __cplusplus
typedef struct nvtt::InputOptions NvttInputOptions;
@ -156,7 +156,6 @@ typedef enum
NVTT_Error_Unknown,
NVTT_Error_FileOpen,
NVTT_Error_FileWrite,
NVTT_Error_UnsupportedOutputFormat,
} NvttError;
typedef enum

31
src/nvtt/squish/Makefile Normal file
View File

@ -0,0 +1,31 @@
include config
SRC = alpha.cpp clusterfit.cpp colourblock.cpp colourfit.cpp colourset.cpp maths.cpp rangefit.cpp singlecolourfit.cpp squish.cpp
OBJ = $(SRC:%.cpp=%.o)
LIB = libsquish.a
all : $(LIB)
install : $(LIB)
install squish.h $(INSTALL_DIR)/include
install libsquish.a $(INSTALL_DIR)/lib
uninstall:
$(RM) $(INSTALL_DIR)/include/squish.h
$(RM) $(INSTALL_DIR)/lib/libsquish.a
$(LIB) : $(OBJ)
$(AR) cr $@ $?
ranlib $@
%.o : %.cpp
$(CXX) $(CPPFLAGS) -I. $(CXXFLAGS) -o$@ -c $<
clean :
$(RM) $(OBJ) $(LIB)

View File

@ -28,7 +28,7 @@
#include "colourblock.h"
#include <cfloat>
namespace nvsquish {
namespace squish {
ClusterFit::ClusterFit()
{
@ -109,7 +109,7 @@ void ClusterFit::SetMetric(float r, float g, float b)
float ClusterFit::GetBestError() const
{
#if SQUISH_USE_SIMD
return m_besterror.GetX();
return m_besterror.GetVec3().X();
#else
return m_besterror;
#endif
@ -280,6 +280,15 @@ void ClusterFit::Compress4( void* block )
m_beta[k] = m_weights[k];
}
/*unsigned int permutation = 0;
for(int p = 0; p < 16; p++) {
permutation |= indices[p] << (p * 2);
}
if (debug) printf("%X:\t", permutation);
if (debug && permutation == 0x55FFFFAA) __debugbreak();
*/
// solve a least squares problem to place the endpoints
#if SQUISH_USE_SIMD
Vec4 start, end;

View File

@ -23,15 +23,15 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_CLUSTERFIT_H
#define NV_SQUISH_CLUSTERFIT_H
#ifndef SQUISH_CLUSTERFIT_H
#define SQUISH_CLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace nvsquish {
namespace squish {
class ClusterFit : public ColourFit
{

View File

@ -25,7 +25,7 @@
#include "colourblock.h"
namespace nvsquish {
namespace squish {
static int FloatToInt( float a, int limit )
{

View File

@ -23,13 +23,13 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_COLOURBLOCK_H
#define NV_SQUISH_COLOURBLOCK_H
#ifndef SQUISH_COLOURBLOCK_H
#define SQUISH_COLOURBLOCK_H
#include "squish.h"
#include "maths.h"
namespace nvsquish {
namespace squish {
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );

View File

@ -26,7 +26,7 @@
#include "colourfit.h"
#include "colourset.h"
namespace nvsquish {
namespace squish {
ColourFit::ColourFit()
{

View File

@ -23,13 +23,13 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_COLOURFIT_H
#define NV_SQUISH_COLOURFIT_H
#ifndef SQUISH_COLOURFIT_H
#define SQUISH_COLOURFIT_H
#include "squish.h"
#include "maths.h"
namespace nvsquish {
namespace squish {
class ColourSet;

View File

@ -25,7 +25,7 @@
#include "colourset.h"
namespace nvsquish {
namespace squish {
// @@ Add flags:
// - MatchTransparent

View File

@ -23,21 +23,21 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_COLOURSET_H
#define NV_SQUISH_COLOURSET_H
#ifndef SQUISH_COLOURSET_H
#define SQUISH_COLOURSET_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
namespace nvsquish {
namespace squish {
/*! @brief Represents a set of block colours
*/
class ColourSet
{
public:
ColourSet( u8 const* rgba, int flags, bool createMinimalSet = true );
ColourSet( u8 const* rgba, int flags, bool createMinimalSet = false );
int GetCount() const { return m_count; }
Vec3 const* GetPoints() const { return m_points; }

View File

@ -1,113 +0,0 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2008 Ignacio Castano castano@gmail.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include <stdio.h>
#include <float.h>
#include <math.h>
struct Precomp {
float alpha2_sum;
float beta2_sum;
float alphabeta_sum;
float factor;
};
int main()
{
int i = 0;
printf("struct Precomp {\n");
printf("\tfloat alpha2_sum;\n");
printf("\tfloat beta2_sum;\n");
printf("\tfloat alphabeta_sum;\n");
printf("\tfloat factor;\n");
printf("};\n\n");
printf("static const SQUISH_ALIGN_16 Precomp s_threeElement[153] = {\n");
// Three element clusters:
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
int c2 = 16 - c0 - c1;
Precomp p;
p.alpha2_sum = c0 + c1 * 0.25f;
p.beta2_sum = c2 + c1 * 0.25f;
p.alphabeta_sum = c1 * 0.25f;
p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
if (isfinite(p.factor))
{
printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2);
}
else
{
printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2);
}
i++;
}
}
printf("}; // %d three cluster elements\n\n", i);
printf("static const SQUISH_ALIGN_16 Precomp s_fourElement[969] = {\n");
// Four element clusters:
i = 0;
for( int c0 = 0; c0 <= 16; c0++)
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
int c3 = 16 - c0 - c1 - c2;
Precomp p;
p.alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
p.beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
p.alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
if (isfinite(p.factor))
{
printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2, c3);
}
else
{
printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2, c3);
}
i++;
}
}
}
printf("}; // %d four cluster elements\n\n", i);
return 0;
}

View File

@ -31,7 +31,7 @@
#include "fastclusterlookup.inl"
namespace nvsquish {
namespace squish {
FastClusterFit::FastClusterFit()
{
@ -129,8 +129,6 @@ void FastClusterFit::Compress3( void* block )
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half = VEC4_CONST(0.5f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -162,22 +160,25 @@ void FastClusterFit::Compress3( void* block )
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
{
@ -273,7 +274,7 @@ void FastClusterFit::Compress4( void* block )
Vec4 const factor = constants.SplatW();
i++;
Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0));
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);
Vec4 const betax_sum = m_xsum - alphax_sum;
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
@ -285,19 +286,18 @@ void FastClusterFit::Compress4( void* block )
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -370,12 +370,6 @@ void FastClusterFit::Compress4( void* block )
void FastClusterFit::Compress3( void* block )
{
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -405,9 +399,16 @@ void FastClusterFit::Compress3( void* block )
Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -476,12 +477,6 @@ void FastClusterFit::Compress3( void* block )
void FastClusterFit::Compress4( void* block )
{
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -516,9 +511,16 @@ void FastClusterFit::Compress4( void* block )
Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
// clamp to the grid
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;

View File

@ -24,15 +24,15 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_FASTCLUSTERFIT_H
#define NV_SQUISH_FASTCLUSTERFIT_H
#ifndef SQUISH_FASTCLUSTERFIT_H
#define SQUISH_FASTCLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace nvsquish {
namespace squish {
class FastClusterFit : public ColourFit
{

View File

@ -27,7 +27,7 @@
#include "simd.h"
#include <cfloat>
namespace nvsquish {
namespace squish {
Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights, Vec3::Arg metric )
{
@ -134,4 +134,4 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
#endif
} // namespace nvsquish
} // namespace squish

View File

@ -23,14 +23,14 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_MATHS_H
#define NV_SQUISH_MATHS_H
#ifndef SQUISH_MATHS_H
#define SQUISH_MATHS_H
#include <cmath>
#include <algorithm>
#include "config.h"
namespace nvsquish {
namespace squish {
class Vec3
{

View File

@ -23,8 +23,8 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_SIMD_H
#define NV_SQUISH_SIMD_H
#ifndef SQUISH_SIMD_H
#define SQUISH_SIMD_H
#include "maths.h"

View File

@ -23,8 +23,8 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_SIMD_SSE_H
#define NV_SQUISH_SIMD_SSE_H
#ifndef SQUISH_SIMD_SSE_H
#define SQUISH_SIMD_SSE_H
#include <xmmintrin.h>
#if ( SQUISH_USE_SSE > 1 )
@ -35,7 +35,7 @@
#define SQUISH_SSE_SPLAT( a ) \
( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
namespace nvsquish {
namespace squish {
#define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) )
@ -72,13 +72,6 @@ public:
_mm_store_ps( c, m_v );
return Vec3( c[0], c[1], c[2] );
}
float GetX() const
{
SQUISH_ALIGN_16 float f;
_mm_store_ss(&f, m_v);
return f;
}
Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); }
Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); }

View File

@ -26,12 +26,10 @@
#ifndef SQUISH_SIMD_VE_H
#define SQUISH_SIMD_VE_H
#ifndef __APPLE_ALTIVEC__
#include <altivec.h>
#undef bool
#endif
namespace nvsquish {
namespace squish {
#define VEC4_CONST( X ) Vec4( ( vector float )( X ) )
@ -78,14 +76,7 @@ public:
u.v = m_v;
return Vec3( u.c[0], u.c[1], u.c[2] );
}
float GetX() const
{
union { vector float v; float c[4]; } u;
u.v = m_v;
return u.c[0];
}
Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); }
Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); }
Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); }

View File

@ -23,11 +23,11 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_H
#define NV_SQUISH_H
#ifndef SQUISH_H
#define SQUISH_H
//! All squish API functions live in this namespace.
namespace nvsquish {
namespace squish {
// -----------------------------------------------------------------------------

View File

@ -0,0 +1,531 @@
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 42;
objects = {
/* Begin PBXBuildFile section */
133FA0DC096A7B8E0050752E /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 133FA0DA096A7B8E0050752E /* alpha.h */; };
133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 133FA0DB096A7B8E0050752E /* alpha.cpp */; };
1342B4160999DF1900152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; };
1342B41A0999DF7000152915 /* squishpng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B4190999DF7000152915 /* squishpng.cpp */; };
1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B43E0999E0CC00152915 /* squishtest.cpp */; };
1342B4420999E0EC00152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; };
1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70B092AA857005EE038 /* clusterfit.cpp */; };
1350D71B092AA858005EE038 /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D70C092AA858005EE038 /* clusterfit.h */; };
1350D71E092AA858005EE038 /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70F092AA858005EE038 /* colourblock.cpp */; };
1350D71F092AA858005EE038 /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D710092AA858005EE038 /* colourblock.h */; };
1350D720092AA858005EE038 /* config.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D711092AA858005EE038 /* config.h */; };
1350D721092AA858005EE038 /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D712092AA858005EE038 /* maths.cpp */; };
1350D722092AA858005EE038 /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D713092AA858005EE038 /* maths.h */; };
1350D725092AA858005EE038 /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D716092AA858005EE038 /* rangefit.cpp */; };
1350D726092AA858005EE038 /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D717092AA858005EE038 /* rangefit.h */; };
1350D727092AA858005EE038 /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D718092AA858005EE038 /* squish.cpp */; };
1350D728092AA858005EE038 /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D719092AA858005EE038 /* squish.h */; settings = {ATTRIBUTES = (Public, ); }; };
139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C21CE09ADAB0800A2500D /* squishgen.cpp */; };
139C234F09B0602700A2500D /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 139C234D09B0602700A2500D /* singlecolourfit.h */; };
139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C234E09B0602700A2500D /* singlecolourfit.cpp */; };
13A7CCA40952BE63001C963A /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 13A7CCA20952BE63001C963A /* colourfit.h */; };
13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13A7CCA30952BE63001C963A /* colourfit.cpp */; };
13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13C4C7AB0941C18000AC5B89 /* colourset.cpp */; };
13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 13C4C7AC0941C18000AC5B89 /* colourset.h */; };
13CD64C2092BCF8A00488C97 /* simd.h in Headers */ = {isa = PBXBuildFile; fileRef = 13CD64C0092BCF8A00488C97 /* simd.h */; };
13D0DC910931F93A00909807 /* simd_ve.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC900931F93A00909807 /* simd_ve.h */; };
13D0DC970931F9D600909807 /* simd_sse.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC960931F9D600909807 /* simd_sse.h */; };
/* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */
1342B52B099BF72F00152915 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = squish;
};
1342B58E099BF93D00152915 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = squish;
};
/* End PBXContainerItemProxy section */
/* Begin PBXFileReference section */
133FA0DA096A7B8E0050752E /* alpha.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = "<group>"; };
133FA0DB096A7B8E0050752E /* alpha.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = "<group>"; };
1342B4110999DE7F00152915 /* squishpng */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishpng; sourceTree = BUILT_PRODUCTS_DIR; };
1342B4190999DF7000152915 /* squishpng.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = squishpng.cpp; path = extra/squishpng.cpp; sourceTree = "<group>"; };
1342B4370999E07C00152915 /* squishtest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishtest; sourceTree = BUILT_PRODUCTS_DIR; };
1342B43E0999E0CC00152915 /* squishtest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishtest.cpp; path = extra/squishtest.cpp; sourceTree = "<group>"; };
1350D70B092AA857005EE038 /* clusterfit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfit.cpp; sourceTree = "<group>"; };
1350D70C092AA858005EE038 /* clusterfit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = clusterfit.h; sourceTree = "<group>"; };
1350D70F092AA858005EE038 /* colourblock.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourblock.cpp; sourceTree = "<group>"; };
1350D710092AA858005EE038 /* colourblock.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourblock.h; sourceTree = "<group>"; };
1350D711092AA858005EE038 /* config.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = "<group>"; };
1350D712092AA858005EE038 /* maths.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = maths.cpp; sourceTree = "<group>"; };
1350D713092AA858005EE038 /* maths.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = maths.h; sourceTree = "<group>"; };
1350D716092AA858005EE038 /* rangefit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = rangefit.cpp; sourceTree = "<group>"; };
1350D717092AA858005EE038 /* rangefit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = rangefit.h; sourceTree = "<group>"; };
1350D718092AA858005EE038 /* squish.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = squish.cpp; sourceTree = "<group>"; };
1350D719092AA858005EE038 /* squish.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = squish.h; sourceTree = "<group>"; };
13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = texture_compression_s3tc.txt; sourceTree = "<group>"; };
139C21C409ADAA7000A2500D /* squishgen */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishgen; sourceTree = BUILT_PRODUCTS_DIR; };
139C21CE09ADAB0800A2500D /* squishgen.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishgen.cpp; path = extra/squishgen.cpp; sourceTree = "<group>"; };
139C234D09B0602700A2500D /* singlecolourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = singlecolourfit.h; sourceTree = "<group>"; };
139C234E09B0602700A2500D /* singlecolourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = singlecolourfit.cpp; sourceTree = "<group>"; };
139C236D09B060A900A2500D /* singlecolourlookup.inl */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = singlecolourlookup.inl; sourceTree = "<group>"; };
13A7CCA20952BE63001C963A /* colourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colourfit.h; sourceTree = "<group>"; };
13A7CCA30952BE63001C963A /* colourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colourfit.cpp; sourceTree = "<group>"; };
13C4C7AB0941C18000AC5B89 /* colourset.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourset.cpp; sourceTree = "<group>"; };
13C4C7AC0941C18000AC5B89 /* colourset.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourset.h; sourceTree = "<group>"; };
13CD64C0092BCF8A00488C97 /* simd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd.h; sourceTree = "<group>"; };
13D0DC900931F93A00909807 /* simd_ve.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_ve.h; sourceTree = "<group>"; };
13D0DC960931F9D600909807 /* simd_sse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_sse.h; sourceTree = "<group>"; };
D2AAC046055464E500DB518D /* libsquish.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsquish.a; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
1342B40F0999DE7F00152915 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
1342B4160999DF1900152915 /* libsquish.a in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
1342B4350999E07C00152915 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
1342B4420999E0EC00152915 /* libsquish.a in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
139C21C209ADAA7000A2500D /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
D289987405E68DCB004EDB86 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
08FB7794FE84155DC02AAC07 /* squish */ = {
isa = PBXGroup;
children = (
08FB7795FE84155DC02AAC07 /* Source */,
C6A0FF2B0290797F04C91782 /* Documentation */,
1AB674ADFE9D54B511CA2CBB /* Products */,
);
name = squish;
sourceTree = "<group>";
};
08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup;
children = (
133FA0DB096A7B8E0050752E /* alpha.cpp */,
133FA0DA096A7B8E0050752E /* alpha.h */,
1350D70B092AA857005EE038 /* clusterfit.cpp */,
1350D70C092AA858005EE038 /* clusterfit.h */,
13A7CCA30952BE63001C963A /* colourfit.cpp */,
13A7CCA20952BE63001C963A /* colourfit.h */,
13C4C7AB0941C18000AC5B89 /* colourset.cpp */,
13C4C7AC0941C18000AC5B89 /* colourset.h */,
1350D70F092AA858005EE038 /* colourblock.cpp */,
1350D710092AA858005EE038 /* colourblock.h */,
13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */,
1350D711092AA858005EE038 /* config.h */,
1350D712092AA858005EE038 /* maths.cpp */,
1350D713092AA858005EE038 /* maths.h */,
1350D716092AA858005EE038 /* rangefit.cpp */,
1350D717092AA858005EE038 /* rangefit.h */,
13CD64C0092BCF8A00488C97 /* simd.h */,
13D0DC960931F9D600909807 /* simd_sse.h */,
13D0DC900931F93A00909807 /* simd_ve.h */,
139C234E09B0602700A2500D /* singlecolourfit.cpp */,
139C234D09B0602700A2500D /* singlecolourfit.h */,
139C236D09B060A900A2500D /* singlecolourlookup.inl */,
1350D718092AA858005EE038 /* squish.cpp */,
1350D719092AA858005EE038 /* squish.h */,
139C21CE09ADAB0800A2500D /* squishgen.cpp */,
1342B4190999DF7000152915 /* squishpng.cpp */,
1342B43E0999E0CC00152915 /* squishtest.cpp */,
);
name = Source;
sourceTree = "<group>";
};
1AB674ADFE9D54B511CA2CBB /* Products */ = {
isa = PBXGroup;
children = (
D2AAC046055464E500DB518D /* libsquish.a */,
1342B4110999DE7F00152915 /* squishpng */,
1342B4370999E07C00152915 /* squishtest */,
139C21C409ADAA7000A2500D /* squishgen */,
);
name = Products;
sourceTree = "<group>";
};
C6A0FF2B0290797F04C91782 /* Documentation */ = {
isa = PBXGroup;
children = (
);
name = Documentation;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */
D2AAC043055464E500DB518D /* Headers */ = {
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
1350D71B092AA858005EE038 /* clusterfit.h in Headers */,
1350D71F092AA858005EE038 /* colourblock.h in Headers */,
1350D720092AA858005EE038 /* config.h in Headers */,
1350D722092AA858005EE038 /* maths.h in Headers */,
1350D726092AA858005EE038 /* rangefit.h in Headers */,
1350D728092AA858005EE038 /* squish.h in Headers */,
13CD64C2092BCF8A00488C97 /* simd.h in Headers */,
13D0DC910931F93A00909807 /* simd_ve.h in Headers */,
13D0DC970931F9D600909807 /* simd_sse.h in Headers */,
13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */,
13A7CCA40952BE63001C963A /* colourfit.h in Headers */,
133FA0DC096A7B8E0050752E /* alpha.h in Headers */,
139C234F09B0602700A2500D /* singlecolourfit.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXHeadersBuildPhase section */
/* Begin PBXNativeTarget section */
1342B4100999DE7F00152915 /* squishpng */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */;
buildPhases = (
1342B40E0999DE7F00152915 /* Sources */,
1342B40F0999DE7F00152915 /* Frameworks */,
);
buildRules = (
);
dependencies = (
1342B58F099BF93D00152915 /* PBXTargetDependency */,
);
name = squishpng;
productName = squishpng;
productReference = 1342B4110999DE7F00152915 /* squishpng */;
productType = "com.apple.product-type.tool";
};
1342B4360999E07C00152915 /* squishtest */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */;
buildPhases = (
1342B4340999E07C00152915 /* Sources */,
1342B4350999E07C00152915 /* Frameworks */,
);
buildRules = (
);
dependencies = (
1342B52C099BF72F00152915 /* PBXTargetDependency */,
);
name = squishtest;
productName = squishtest;
productReference = 1342B4370999E07C00152915 /* squishtest */;
productType = "com.apple.product-type.tool";
};
139C21C309ADAA7000A2500D /* squishgen */ = {
isa = PBXNativeTarget;
buildConfigurationList = 139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */;
buildPhases = (
139C21C109ADAA7000A2500D /* Sources */,
139C21C209ADAA7000A2500D /* Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = squishgen;
productName = squishgen;
productReference = 139C21C409ADAA7000A2500D /* squishgen */;
productType = "com.apple.product-type.tool";
};
D2AAC045055464E500DB518D /* squish */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */;
buildPhases = (
D2AAC043055464E500DB518D /* Headers */,
D2AAC044055464E500DB518D /* Sources */,
D289987405E68DCB004EDB86 /* Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = squish;
productName = squish;
productReference = D2AAC046055464E500DB518D /* libsquish.a */;
productType = "com.apple.product-type.library.static";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
08FB7793FE84155DC02AAC07 /* Project object */ = {
isa = PBXProject;
buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */;
hasScannedForEncodings = 1;
mainGroup = 08FB7794FE84155DC02AAC07 /* squish */;
projectDirPath = "";
targets = (
D2AAC045055464E500DB518D /* squish */,
1342B4100999DE7F00152915 /* squishpng */,
1342B4360999E07C00152915 /* squishtest */,
139C21C309ADAA7000A2500D /* squishgen */,
);
};
/* End PBXProject section */
/* Begin PBXSourcesBuildPhase section */
1342B40E0999DE7F00152915 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
1342B41A0999DF7000152915 /* squishpng.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
1342B4340999E07C00152915 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
139C21C109ADAA7000A2500D /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
D2AAC044055464E500DB518D /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */,
1350D71E092AA858005EE038 /* colourblock.cpp in Sources */,
1350D721092AA858005EE038 /* maths.cpp in Sources */,
1350D725092AA858005EE038 /* rangefit.cpp in Sources */,
1350D727092AA858005EE038 /* squish.cpp in Sources */,
13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */,
13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */,
133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */,
139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin PBXTargetDependency section */
1342B52C099BF72F00152915 /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
target = D2AAC045055464E500DB518D /* squish */;
targetProxy = 1342B52B099BF72F00152915 /* PBXContainerItemProxy */;
};
1342B58F099BF93D00152915 /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
target = D2AAC045055464E500DB518D /* squish */;
targetProxy = 1342B58E099BF93D00152915 /* PBXContainerItemProxy */;
};
/* End PBXTargetDependency section */
/* Begin XCBuildConfiguration section */
1342B4140999DE9F00152915 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = (
..,
/sw/include,
);
INSTALL_PATH = "$(HOME)/bin";
LIBRARY_SEARCH_PATHS = /sw/lib;
OTHER_LDFLAGS = "-lpng";
PRODUCT_NAME = squishpng;
};
name = Debug;
};
1342B4150999DE9F00152915 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = (
..,
/sw/include,
);
INSTALL_PATH = "$(HOME)/bin";
LIBRARY_SEARCH_PATHS = /sw/lib;
OTHER_LDFLAGS = "-lpng";
PRODUCT_NAME = squishpng;
};
name = Release;
};
1342B43C0999E0C000152915 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishtest;
};
name = Debug;
};
1342B43D0999E0C000152915 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishtest;
};
name = Release;
};
139C21CC09ADAB0300A2500D /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishgen;
};
name = Debug;
};
139C21CD09ADAB0300A2500D /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
HEADER_SEARCH_PATHS = ..;
INSTALL_PATH = "$(HOME)/bin";
PRODUCT_NAME = squishgen;
};
name = Release;
};
1DEB91EC08733DB70010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
COPY_PHASE_STRIP = NO;
GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1";
INSTALL_PATH = /usr/local/lib;
OTHER_CFLAGS = "-maltivec";
PRODUCT_NAME = squish;
STRIP_INSTALLED_PRODUCT = NO;
};
name = Debug;
};
1DEB91ED08733DB70010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1";
INSTALL_PATH = /usr/local/lib;
OTHER_CFLAGS = "-maltivec";
PRODUCT_NAME = squish;
STRIP_INSTALLED_PRODUCT = YES;
};
name = Release;
};
1DEB91F008733DB70010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
GCC_DYNAMIC_NO_PIC = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_TREAT_WARNINGS_AS_ERRORS = YES;
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_PEDANTIC = YES;
GCC_WARN_SHADOW = YES;
GCC_WARN_SIGN_COMPARE = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VALUE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
PREBINDING = NO;
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
};
name = Debug;
};
1DEB91F108733DB70010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
GCC_DYNAMIC_NO_PIC = YES;
GCC_OPTIMIZATION_LEVEL = 3;
GCC_TREAT_WARNINGS_AS_ERRORS = YES;
GCC_UNROLL_LOOPS = YES;
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_PEDANTIC = YES;
GCC_WARN_SHADOW = YES;
GCC_WARN_SIGN_COMPARE = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VALUE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
PREBINDING = NO;
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1342B4140999DE9F00152915 /* Debug */,
1342B4150999DE9F00152915 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1342B43C0999E0C000152915 /* Debug */,
1342B43D0999E0C000152915 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */ = {
isa = XCConfigurationList;
buildConfigurations = (
139C21CC09ADAB0300A2500D /* Debug */,
139C21CD09ADAB0300A2500D /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1DEB91EC08733DB70010E9CD /* Debug */,
1DEB91ED08733DB70010E9CD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1DEB91F008733DB70010E9CD /* Debug */,
1DEB91F108733DB70010E9CD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
}

View File

@ -0,0 +1,508 @@
Name
EXT_texture_compression_s3tc
Name Strings
GL_EXT_texture_compression_s3tc
Contact
Pat Brown, NVIDIA Corporation (pbrown 'at' nvidia.com)
Status
FINAL
Version
1.1, 16 November 2001 (containing only clarifications relative to
version 1.0, dated 7 July 2000)
Number
198
Dependencies
OpenGL 1.1 is required.
GL_ARB_texture_compression is required.
This extension is written against the OpenGL 1.2.1 Specification.
Overview
This extension provides additional texture compression functionality
specific to S3's S3TC format (called DXTC in Microsoft's DirectX API),
subject to all the requirements and limitations described by the extension
GL_ARB_texture_compression.
This extension supports DXT1, DXT3, and DXT5 texture compression formats.
For the DXT1 image format, this specification supports an RGB-only mode
and a special RGBA mode with single-bit "transparent" alpha.
IP Status
Contact S3 Incorporated (http://www.s3.com) regarding any intellectual
property issues associated with implementing this extension.
WARNING: Vendors able to support S3TC texture compression in Direct3D
drivers do not necessarily have the right to use the same functionality in
OpenGL.
Issues
(1) Should DXT2 and DXT4 (premultiplied alpha) formats be supported?
RESOLVED: No -- insufficient interest. Supporting DXT2 and DXT4
would require some rework to the TexEnv definition (maybe add a new
base internal format RGBA_PREMULTIPLIED_ALPHA) for these formats.
Note that the EXT_texture_env_combine extension (which extends normal
TexEnv modes) can be used to support textures with premultipled alpha.
(2) Should generic "RGB_S3TC_EXT" and "RGBA_S3TC_EXT" enums be supported
or should we use only the DXT<n> enums?
RESOLVED: No. A generic RGBA_S3TC_EXT is problematic because DXT3
and DXT5 are both nominally RGBA (and DXT1 with the 1-bit alpha is
also) yet one format must be chosen up front.
(3) Should TexSubImage support all block-aligned edits or just the minimal
functionality required by the ARB_texture_compression extension?
RESOLVED: Allow all valid block-aligned edits.
(4) A pre-compressed image with a DXT1 format can be used as either an
RGB_S3TC_DXT1 or an RGBA_S3TC_DXT1 image. If the image has
transparent texels, how are they treated in each format?
RESOLVED: The renderer has to make sure that an RGB_S3TC_DXT1 format
is decoded as RGB (where alpha is effectively one for all texels),
while RGBA_S3TC_DXT1 is decoded as RGBA (where alpha is zero for all
texels with "transparent" encodings). Otherwise, the formats are
identical.
(5) Is the encoding of the RGB components for DXT1 formats correct in this
spec? MSDN documentation does not specify an RGB color for the
"transparent" encoding. Is it really black?
RESOLVED: Yes. The specification for the DXT1 format initially
required black, but later changed that requirement to a
recommendation. All vendors involved in the definition of this
specification support black. In addition, specifying black has a
useful behavior.
When blending multiple texels (GL_LINEAR filtering), mixing opaque and
transparent samples is problematic. Defining a black color on
transparent texels achieves a sensible result that works like a
texture with premultiplied alpha. For example, if three opaque white
and one transparent sample is being averaged, the result would be a
75% intensity gray (with an alpha of 75%). This is the same result on
the color channels as would be obtained using a white color, 75%
alpha, and a SRC_ALPHA blend factor.
(6) Is the encoding of the RGB components for DXT3 and DXT5 formats
correct in this spec? MSDN documentation suggests that the RGB blocks
for DXT3 and DXT5 are decoded as described by the DXT1 format.
RESOLVED: Yes -- this appears to be a bug in the MSDN documentation.
The specification for the DXT2-DXT5 formats require decoding using the
opaque block encoding, regardless of the relative values of "color0"
and "color1".
New Procedures and Functions
None.
New Tokens
Accepted by the <internalformat> parameter of TexImage2D, CopyTexImage2D,
and CompressedTexImage2DARB and the <format> parameter of
CompressedTexSubImage2DARB:
COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0
COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1
COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2
COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3
Additions to Chapter 2 of the OpenGL 1.2.1 Specification (OpenGL Operation)
None.
Additions to Chapter 3 of the OpenGL 1.2.1 Specification (Rasterization)
Add to Table 3.16.1: Specific Compressed Internal Formats
Compressed Internal Format Base Internal Format
========================== ====================
COMPRESSED_RGB_S3TC_DXT1_EXT RGB
COMPRESSED_RGBA_S3TC_DXT1_EXT RGBA
COMPRESSED_RGBA_S3TC_DXT3_EXT RGBA
COMPRESSED_RGBA_S3TC_DXT5_EXT RGBA
Modify Section 3.8.2, Alternate Image Specification
(add to end of TexSubImage discussion, p.123 -- after edit from the
ARB_texture_compression spec)
If the internal format of the texture image being modified is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
texture is stored using one of the several S3TC compressed texture image
formats. Such images are easily edited along 4x4 texel boundaries, so the
limitations on TexSubImage2D or CopyTexSubImage2D parameters are relaxed.
TexSubImage2D and CopyTexSubImage2D will result in an INVALID_OPERATION
error only if one of the following conditions occurs:
* <width> is not a multiple of four or equal to TEXTURE_WIDTH,
unless <xoffset> and <yoffset> are both zero.
* <height> is not a multiple of four or equal to TEXTURE_HEIGHT,
unless <xoffset> and <yoffset> are both zero.
* <xoffset> or <yoffset> is not a multiple of four.
The contents of any 4x4 block of texels of an S3TC compressed texture
image that does not intersect the area being modified are preserved during
valid TexSubImage2D and CopyTexSubImage2D calls.
Add to Section 3.8.2, Alternate Image Specification (adding to the end of
the CompressedTexImage section introduced by the ARB_texture_compression
spec)
If <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
COMPRESSED_RGBA_S3TC_DXT5_EXT, the compressed texture is stored using one
of several S3TC compressed texture image formats. The S3TC texture
compression algorithm supports only 2D images without borders.
CompressedTexImage1DARB and CompressedTexImage3DARB produce an
INVALID_ENUM error if <internalformat> is an S3TC format.
CompressedTexImage2DARB will produce an INVALID_OPERATION error if
<border> is non-zero.
Add to Section 3.8.2, Alternate Image Specification (adding to the end of
the CompressedTexSubImage section introduced by the
ARB_texture_compression spec)
If the internal format of the texture image being modified is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
texture is stored using one of the several S3TC compressed texture image
formats. Since the S3TC texture compression algorithm supports only 2D
images, CompressedTexSubImage1DARB and CompressedTexSubImage3DARB produce
an INVALID_ENUM error if <format> is an S3TC format. Since S3TC images
are easily edited along 4x4 texel boundaries, the limitations on
CompressedTexSubImage2D are relaxed. CompressedTexSubImage2D will result
in an INVALID_OPERATION error only if one of the following conditions
occurs:
* <width> is not a multiple of four or equal to TEXTURE_WIDTH.
* <height> is not a multiple of four or equal to TEXTURE_HEIGHT.
* <xoffset> or <yoffset> is not a multiple of four.
The contents of any 4x4 block of texels of an S3TC compressed texture
image that does not intersect the area being modified are preserved during
valid TexSubImage2D and CopyTexSubImage2D calls.
Additions to Chapter 4 of the OpenGL 1.2.1 Specification (Per-Fragment
Operations and the Frame Buffer)
None.
Additions to Chapter 5 of the OpenGL 1.2.1 Specification (Special Functions)
None.
Additions to Chapter 6 of the OpenGL 1.2.1 Specification (State and
State Requests)
None.
Additions to Appendix A of the OpenGL 1.2.1 Specification (Invariance)
None.
Additions to the AGL/GLX/WGL Specifications
None.
GLX Protocol
None.
Errors
INVALID_ENUM is generated by CompressedTexImage1DARB or
CompressedTexImage3DARB if <internalformat> is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT.
INVALID_OPERATION is generated by CompressedTexImage2DARB if
<internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
COMPRESSED_RGBA_S3TC_DXT5_EXT and <border> is not equal to zero.
INVALID_ENUM is generated by CompressedTexSubImage1DARB or
CompressedTexSubImage3DARB if <format> is COMPRESSED_RGB_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
COMPRESSED_RGBA_S3TC_DXT5_EXT.
INVALID_OPERATION is generated by TexSubImage2D CopyTexSubImage2D, or
CompressedTexSubImage2D if TEXTURE_INTERNAL_FORMAT is
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT and any of
the following apply: <width> is not a multiple of four or equal to
TEXTURE_WIDTH; <height> is not a multiple of four or equal to
TEXTURE_HEIGHT; <xoffset> or <yoffset> is not a multiple of four.
The following restrictions from the ARB_texture_compression specification
do not apply to S3TC texture formats, since subimage modification is
straightforward as long as the subimage is properly aligned.
DELETE: INVALID_OPERATION is generated by TexSubImage1D, TexSubImage2D,
DELETE: TexSubImage3D, CopyTexSubImage1D, CopyTexSubImage2D, or
DELETE: CopyTexSubImage3D if the internal format of the texture image is
DELETE: compressed and <xoffset>, <yoffset>, or <zoffset> does not equal
DELETE: -b, where b is value of TEXTURE_BORDER.
DELETE: INVALID_VALUE is generated by CompressedTexSubImage1DARB,
DELETE: CompressedTexSubImage2DARB, or CompressedTexSubImage3DARB if the
DELETE: entire texture image is not being edited: if <xoffset>,
DELETE: <yoffset>, or <zoffset> is greater than -b, <xoffset> + <width> is
DELETE: less than w+b, <yoffset> + <height> is less than h+b, or <zoffset>
DELETE: + <depth> is less than d+b, where b is the value of
DELETE: TEXTURE_BORDER, w is the value of TEXTURE_WIDTH, h is the value of
DELETE: TEXTURE_HEIGHT, and d is the value of TEXTURE_DEPTH.
See also errors in the GL_ARB_texture_compression specification.
New State
In the "Textures" state table, increment the TEXTURE_INTERNAL_FORMAT
subscript for Z by 4 in the "Type" row.
New Implementation Dependent State
None
Appendix
S3TC Compressed Texture Image Formats
Compressed texture images stored using the S3TC compressed image formats
are represented as a collection of 4x4 texel blocks, where each block
contains 64 or 128 bits of texel data. The image is encoded as a normal
2D raster image in which each 4x4 block is treated as a single pixel. If
an S3TC image has a width or height less than four, the data corresponding
to texels outside the image are irrelevant and undefined.
When an S3TC image with a width of <w>, height of <h>, and block size of
<blocksize> (8 or 16 bytes) is decoded, the corresponding image size (in
bytes) is:
ceil(<w>/4) * ceil(<h>/4) * blocksize.
When decoding an S3TC image, the block containing the texel at offset
(<x>, <y>) begins at an offset (in bytes) relative to the base of the
image of:
blocksize * (ceil(<w>/4) * floor(<y>/4) + floor(<x>/4)).
The data corresponding to a specific texel (<x>, <y>) are extracted from a
4x4 texel block using a relative (x,y) value of
(<x> modulo 4, <y> modulo 4).
There are four distinct S3TC image formats:
COMPRESSED_RGB_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64
bits of RGB image data.
Each RGB image data block is encoded as a sequence of 8 bytes, called (in
order of increasing address):
c0_lo, c0_hi, c1_lo, c1_hi, bits_0, bits_1, bits_2, bits_3
The 8 bytes of the block are decoded into three quantities:
color0 = c0_lo + c0_hi * 256
color1 = c1_lo + c1_hi * 256
bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * bits_3))
color0 and color1 are 16-bit unsigned integers that are unpacked to
RGB colors RGB0 and RGB1 as though they were 16-bit packed pixels with
a <format> of RGB and a type of UNSIGNED_SHORT_5_6_5.
bits is a 32-bit unsigned integer, from which a two-bit control code
is extracted for a texel at location (x,y) in the block using:
code(x,y) = bits[2*(4*y+x)+1..2*(4*y+x)+0]
where bit 31 is the most significant and bit 0 is the least
significant bit.
The RGB color for a texel at location (x,y) in the block is given by:
RGB0, if color0 > color1 and code(x,y) == 0
RGB1, if color0 > color1 and code(x,y) == 1
(2*RGB0+RGB1)/3, if color0 > color1 and code(x,y) == 2
(RGB0+2*RGB1)/3, if color0 > color1 and code(x,y) == 3
RGB0, if color0 <= color1 and code(x,y) == 0
RGB1, if color0 <= color1 and code(x,y) == 1
(RGB0+RGB1)/2, if color0 <= color1 and code(x,y) == 2
BLACK, if color0 <= color1 and code(x,y) == 3
Arithmetic operations are done per component, and BLACK refers to an
RGB color where red, green, and blue are all zero.
Since this image has an RGB format, there is no alpha component and the
image is considered fully opaque.
COMPRESSED_RGBA_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64
bits of RGB image data and minimal alpha information. The RGB components
of a texel are extracted in the same way as COMPRESSED_RGB_S3TC_DXT1_EXT.
The alpha component for a texel at location (x,y) in the block is
given by:
0.0, if color0 <= color1 and code(x,y) == 3
1.0, otherwise
IMPORTANT: When encoding an RGBA image into a format using 1-bit
alpha, any texels with an alpha component less than 0.5 end up with an
alpha of 0.0 and any texels with an alpha component greater than or
equal to 0.5 end up with an alpha of 1.0. When encoding an RGBA image
into the COMPRESSED_RGBA_S3TC_DXT1_EXT format, the resulting red,
green, and blue components of any texels with a final alpha of 0.0
will automatically be zero (black). If this behavior is not desired
by an application, it should not use COMPRESSED_RGBA_S3TC_DXT1_EXT.
This format will never be used when a generic compressed internal
format (Table 3.16.2) is specified, although the nearly identical
format COMPRESSED_RGB_S3TC_DXT1_EXT (above) may be.
COMPRESSED_RGBA_S3TC_DXT3_EXT: Each 4x4 block of texels consists of 64
bits of uncompressed alpha image data followed by 64 bits of RGB image
data.
Each RGB image data block is encoded according to the
COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
bits always use the non-transparent encodings. In other words, they are
treated as though color0 > color1, regardless of the actual values of
color0 and color1.
Each alpha image data block is encoded as a sequence of 8 bytes, called
(in order of increasing address):
a0, a1, a2, a3, a4, a5, a6, a7
The 8 bytes of the block are decoded into one 64-bit integer:
alpha = a0 + 256 * (a1 + 256 * (a2 + 256 * (a3 + 256 * (a4 +
256 * (a5 + 256 * (a6 + 256 * a7))))))
alpha is a 64-bit unsigned integer, from which a four-bit alpha value
is extracted for a texel at location (x,y) in the block using:
alpha(x,y) = bits[4*(4*y+x)+3..4*(4*y+x)+0]
where bit 63 is the most significant and bit 0 is the least
significant bit.
The alpha component for a texel at location (x,y) in the block is
given by alpha(x,y) / 15.
COMPRESSED_RGBA_S3TC_DXT5_EXT: Each 4x4 block of texels consists of 64
bits of compressed alpha image data followed by 64 bits of RGB image data.
Each RGB image data block is encoded according to the
COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
bits always use the non-transparent encodings. In other words, they are
treated as though color0 > color1, regardless of the actual values of
color0 and color1.
Each alpha image data block is encoded as a sequence of 8 bytes, called
(in order of increasing address):
alpha0, alpha1, bits_0, bits_1, bits_2, bits_3, bits_4, bits_5
The alpha0 and alpha1 are 8-bit unsigned bytes converted to alpha
components by multiplying by 1/255.
The 6 "bits" bytes of the block are decoded into one 48-bit integer:
bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 +
256 * (bits_4 + 256 * bits_5))))
bits is a 48-bit unsigned integer, from which a three-bit control code
is extracted for a texel at location (x,y) in the block using:
code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0]
where bit 47 is the most significant and bit 0 is the least
significant bit.
The alpha component for a texel at location (x,y) in the block is
given by:
alpha0, code(x,y) == 0
alpha1, code(x,y) == 1
(6*alpha0 + 1*alpha1)/7, alpha0 > alpha1 and code(x,y) == 2
(5*alpha0 + 2*alpha1)/7, alpha0 > alpha1 and code(x,y) == 3
(4*alpha0 + 3*alpha1)/7, alpha0 > alpha1 and code(x,y) == 4
(3*alpha0 + 4*alpha1)/7, alpha0 > alpha1 and code(x,y) == 5
(2*alpha0 + 5*alpha1)/7, alpha0 > alpha1 and code(x,y) == 6
(1*alpha0 + 6*alpha1)/7, alpha0 > alpha1 and code(x,y) == 7
(4*alpha0 + 1*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 2
(3*alpha0 + 2*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 3
(2*alpha0 + 3*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 4
(1*alpha0 + 4*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 5
0.0, alpha0 <= alpha1 and code(x,y) == 6
1.0, alpha0 <= alpha1 and code(x,y) == 7
Revision History
1.1, 11/16/01 pbrown: Updated contact info, clarified where texels
fall within a single block.
1.0, 07/07/00 prbrown1: Published final version agreed to by working
group members.
0.9, 06/24/00 prbrown1: Documented that block-aligned TexSubImage calls
do not modify existing texels outside the
modified blocks. Added caveat to allow for a
(0,0)-anchored TexSubImage operation of
arbitrary size.
0.7, 04/11/00 prbrown1: Added issues on DXT1, DXT3, and DXT5 encodings
where the MSDN documentation doesn't match what
is really done. Added enum values from the
extension registry.
0.4, 03/28/00 prbrown1: Updated to reflect final version of the
ARB_texture_compression extension. Allowed
block-aligned TexSubImage calls.
0.3, 03/07/00 prbrown1: Resolved issues pertaining to the format of RGB
blocks in the DXT3 and DXT5 formats (they don't
ever use the "transparent" encoding). Fixed
decoding of DXT1 blocks. Pointed out issue of
"transparent" texels in DXT1 encodings having
different behaviors for RGB and RGBA internal
formats.
0.2, 02/23/00 prbrown1: Minor revisions; added several issues.
0.11, 02/17/00 prbrown1: Slight modification to error semantics
(INVALID_ENUM instead of INVALID_OPERATION).
0.1, 02/15/00 prbrown1: Initial revision.

View File

@ -0,0 +1,39 @@
Microsoft Visual Studio Solution File, Format Version 8.00
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squish", "squish\squish.vcproj", "{6A8518C3-D81A-4428-BD7F-C37933088AC1}"
ProjectSection(ProjectDependencies) = postProject
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishpng", "squishpng\squishpng.vcproj", "{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}"
ProjectSection(ProjectDependencies) = postProject
{6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishtest", "squishtest\squishtest.vcproj", "{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}"
ProjectSection(ProjectDependencies) = postProject
{6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfiguration) = preSolution
Debug = Debug
Release = Release
EndGlobalSection
GlobalSection(ProjectConfiguration) = postSolution
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.ActiveCfg = Debug|Win32
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.Build.0 = Debug|Win32
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.ActiveCfg = Release|Win32
{6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.Build.0 = Release|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.ActiveCfg = Debug|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.Build.0 = Debug|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.ActiveCfg = Release|Win32
{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.Build.0 = Release|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.ActiveCfg = Debug|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.Build.0 = Debug|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.ActiveCfg = Release|Win32
{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
EndGlobalSection
GlobalSection(ExtensibilityAddIns) = postSolution
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,198 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.10"
Name="squish"
ProjectGUID="{6A8518C3-D81A-4428-BD7F-C37933088AC1}"
Keyword="Win32Proj">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="4"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;SQUISH_USE_SSE=1"
MinimalRebuild="TRUE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
EnableEnhancedInstructionSet="1"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLibrarianTool"
OutputFile="$(OutDir)/squish.lib"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="4"
CharacterSet="2"
WholeProgramOptimization="TRUE">
<Tool
Name="VCCLCompilerTool"
GlobalOptimizations="TRUE"
InlineFunctionExpansion="2"
FavorSizeOrSpeed="1"
OmitFramePointers="TRUE"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;SQUISH_USE_SSE=1"
RuntimeLibrary="2"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLibrarianTool"
OutputFile="$(OutDir)/squish.lib"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
RelativePath="..\..\alpha.cpp">
</File>
<File
RelativePath="..\..\clusterfit.cpp">
</File>
<File
RelativePath="..\..\colourblock.cpp">
</File>
<File
RelativePath="..\..\colourfit.cpp">
</File>
<File
RelativePath="..\..\colourset.cpp">
</File>
<File
RelativePath="..\..\maths.cpp">
</File>
<File
RelativePath="..\..\rangefit.cpp">
</File>
<File
RelativePath="..\..\singlecolourfit.cpp">
</File>
<File
RelativePath="..\..\squish.cpp">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
<File
RelativePath="..\..\alpha.h">
</File>
<File
RelativePath="..\..\clusterfit.h">
</File>
<File
RelativePath="..\..\colourblock.h">
</File>
<File
RelativePath="..\..\colourfit.h">
</File>
<File
RelativePath="..\..\colourset.h">
</File>
<File
RelativePath="..\..\config.h">
</File>
<File
RelativePath="..\..\maths.h">
</File>
<File
RelativePath="..\..\rangefit.h">
</File>
<File
RelativePath="..\..\simd.h">
</File>
<File
RelativePath="..\..\simd_sse.h">
</File>
<File
RelativePath="..\..\simd_ve.h">
</File>
<File
RelativePath="..\..\singlecolourfit.h">
</File>
<File
RelativePath="..\..\singlecolourlookup.inl">
</File>
<File
RelativePath="..\..\squish.h">
</File>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
<File
RelativePath="..\..\texture_compression_s3tc.txt">
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -0,0 +1,140 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.10"
Name="squishpng"
ProjectGUID="{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}"
Keyword="Win32Proj">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="TRUE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="libpng13d.lib"
OutputFile="$(OutDir)/squishpng.exe"
LinkIncremental="2"
GenerateDebugInformation="TRUE"
ProgramDatabaseFile="$(OutDir)/squishpng.pdb"
SubSystem="1"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="libpng13.lib"
OutputFile="$(OutDir)/squishpng.exe"
LinkIncremental="1"
GenerateDebugInformation="TRUE"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
RelativePath="..\..\extra\squishpng.cpp">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -0,0 +1,138 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.10"
Name="squishtest"
ProjectGUID="{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}"
Keyword="Win32Proj">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="TRUE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
OutputFile="$(OutDir)/squishtest.exe"
LinkIncremental="2"
GenerateDebugInformation="TRUE"
ProgramDatabaseFile="$(OutDir)/squishtest.pdb"
SubSystem="1"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="1"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\.."
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
ForceConformanceInForLoopScope="TRUE"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
OutputFile="$(OutDir)/squishtest.exe"
LinkIncremental="1"
GenerateDebugInformation="TRUE"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
RelativePath="..\..\extra\squishtest.cpp">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}">
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}">
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

File diff suppressed because it is too large Load Diff

View File

@ -24,15 +24,15 @@
-------------------------------------------------------------------------- */
#ifndef NV_SQUISH_WEIGHTEDCLUSTERFIT_H
#define NV_SQUISH_WEIGHTEDCLUSTERFIT_H
#ifndef SQUISH_WEIGHTEDCLUSTERFIT_H
#define SQUISH_WEIGHTEDCLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace nvsquish {
namespace squish {
class WeightedClusterFit : public ColourFit
{

View File

@ -1,28 +0,0 @@
ADD_EXECUTABLE(filtertest filtertest.cpp ../tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(nvtestsuite testsuite.cpp)
TARGET_LINK_LIBRARIES(nvtestsuite nvcore nvmath nvimage nvtt)
ADD_TEST(NVTT.TestSuite.Kodak.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 0 -out output-cuda-kodak)
ADD_TEST(NVTT.TestSuite.Waterloo.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 1 -out output-cuda-waterloo)
ADD_TEST(NVTT.TestSuite.Epic.cuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 2 -out output-cuda-epic)
ADD_TEST(NVTT.TestSuite.Kodak.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 0 -nocuda -out output-nocuda-kodak)
ADD_TEST(NVTT.TestSuite.Waterloo.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 1 -nocuda -out output-nocuda-waterloo)
ADD_TEST(NVTT.TestSuite.Epic.nocuda nvtestsuite -path ${NV_SOURCE_DIR}/data/testsuite -set 2 -nocuda -out output-nocuda-epic)
IF (CUDA_FOUND)
ADD_EXECUTABLE(driverapitest driverapi.cpp)
TARGET_LINK_LIBRARIES(driverapitest nvcore nvmath nvimage)
ENDIF (CUDA_FOUND)
ADD_EXECUTABLE(imperativeapi imperativeapi.cpp)
TARGET_LINK_LIBRARIES(imperativeapi nvcore nvmath nvimage nvtt)
INSTALL(TARGETS nvtestsuite DESTINATION bin)
#include_directories("/usr/include/ffmpeg/")
#ADD_EXECUTABLE(nvmpegenc tools/mpegenc.cpp tools/cmdline.h)
#TARGET_LINK_LIBRARIES(nvmpegenc nvcore nvmath nvimage avcodec z)
#INSTALL(TARGETS nvmpegenc DESTINATION bin)

View File

@ -1,144 +0,0 @@
#include <nvcore/Library.h>
#include <cuda.h>
#include <stdio.h>
// Typedefs
typedef CUresult (CUDAAPI * cuInitPtr)( unsigned int Flags );
typedef CUresult (CUDAAPI * cuDeviceGetPtr)(CUdevice *device, int ordinal);
typedef CUresult (CUDAAPI * cuDeviceGetCountPtr)(int *count);
typedef CUresult (CUDAAPI * cuDeviceGetNamePtr)(char *name, int len, CUdevice dev);
typedef CUresult (CUDAAPI * cuDeviceComputeCapabilityPtr)(int *major, int *minor, CUdevice dev);
typedef CUresult (CUDAAPI * cuDeviceTotalMemPtr)(unsigned int *bytes, CUdevice dev);
typedef CUresult (CUDAAPI * cuDeviceGetPropertiesPtr)(CUdevprop *prop, CUdevice dev);
typedef CUresult (CUDAAPI * cuDeviceGetAttributePtr)(int *pi, CUdevice_attribute attrib, CUdevice dev);
typedef CUresult (CUDAAPI * cuCtxCreatePtr)(CUcontext *pctx, unsigned int flags, CUdevice dev );
typedef CUresult (CUDAAPI * cuCtxDestroyPtr)( CUcontext ctx );
typedef CUresult (CUDAAPI * cuCtxAttachPtr)(CUcontext *pctx, unsigned int flags);
typedef CUresult (CUDAAPI * cuCtxDetachPtr)(CUcontext ctx);
typedef CUresult (CUDAAPI * cuCtxPushCurrentPtr)( CUcontext ctx );
typedef CUresult (CUDAAPI * cuCtxPopCurrentPtr)( CUcontext *pctx );
typedef CUresult (CUDAAPI * cuCtxGetDevicePtr)(CUdevice *device);
typedef CUresult (CUDAAPI * cuCtxSynchronizePtr)(void);
// A compressor inits CUDA and creates a context for each device.
//
struct CudaDevice
{
CUdevice device;
CUcontext context;
};
struct CudaContext
{
CudaContext()
{
printf("CudaContext()\n");
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
cuInit = (cuInitPtr)nvcuda.bindSymbol("cuInit");
cuDeviceGet = (cuDeviceGetPtr)nvcuda.bindSymbol("cuDeviceGet");
cuDeviceGetCount = (cuDeviceGetCountPtr)nvcuda.bindSymbol("cuDeviceGetCount");
cuDeviceGetName = (cuDeviceGetNamePtr)nvcuda.bindSymbol("cuDeviceGetName");
cuDeviceComputeCapability = (cuDeviceComputeCapabilityPtr)nvcuda.bindSymbol("cuDeviceComputeCapability");
cuDeviceTotalMem = (cuDeviceTotalMemPtr)nvcuda.bindSymbol("cuDeviceTotalMem");
cuDeviceGetProperties = (cuDeviceGetPropertiesPtr)nvcuda.bindSymbol("cuDeviceGetProperties");
cuDeviceGetAttribute = (cuDeviceGetAttributePtr)nvcuda.bindSymbol("cuDeviceGetAttribute");
cuCtxCreate = (cuCtxCreatePtr)nvcuda.bindSymbol("cuCtxCreate");
cuCtxDestroy = (cuCtxDestroyPtr)nvcuda.bindSymbol("cuCtxDestroy");
cuCtxAttach = (cuCtxAttachPtr)nvcuda.bindSymbol("cuCtxAttach");
cuCtxDetach = (cuCtxDetachPtr)nvcuda.bindSymbol("cuCtxDetach");
cuCtxPushCurrent = (cuCtxPushCurrentPtr)nvcuda.bindSymbol("cuCtxPushCurrent");
cuCtxPopCurrent = (cuCtxPopCurrentPtr)nvcuda.bindSymbol("cuCtxPopCurrent");
cuCtxGetDevice = (cuCtxGetDevicePtr)nvcuda.bindSymbol("cuCtxGetDevice");
cuCtxSynchronize = (cuCtxSynchronizePtr)nvcuda.bindSymbol("cuCtxSynchronize");
CUresult status = cuInit(0);
if (status == CUDA_SUCCESS)
{
printf("cuInit succeeded.\n");
}
m_deviceCount = 0;
cuDeviceGetCount(&m_deviceCount);
printf("%d devices found.\n", m_deviceCount);
if (m_deviceCount > 0)
{
m_devices = new CudaDevice[m_deviceCount];
uint flags = CU_CTX_SCHED_AUTO;
if (m_deviceCount > 1) flags = CU_CTX_SCHED_YIELD;
for (int i = 0; i < m_deviceCount; i++)
{
cuDeviceGet(&m_devices[i].device, i);
cuCtxCreate(&m_devices[i].context, flags, m_devices[i].device);
cuCtxDestroy(m_devices[i].context);
}
}
}
~CudaContext()
{
printf("~CudaContext()\n");
if (m_deviceCount > 0)
{
for (int i = 0; i < m_deviceCount; i++)
{
cuCtxDestroy(m_devices[i].context);
}
delete [] m_devices;
}
}
public:
cuInitPtr cuInit;
cuDeviceGetPtr cuDeviceGet;
cuDeviceGetCountPtr cuDeviceGetCount;
cuDeviceGetNamePtr cuDeviceGetName;
cuDeviceComputeCapabilityPtr cuDeviceComputeCapability;
cuDeviceTotalMemPtr cuDeviceTotalMem;
cuDeviceGetPropertiesPtr cuDeviceGetProperties;
cuDeviceGetAttributePtr cuDeviceGetAttribute;
cuCtxCreatePtr cuCtxCreate;
cuCtxDestroyPtr cuCtxDestroy;
cuCtxAttachPtr cuCtxAttach;
cuCtxDetachPtr cuCtxDetach;
cuCtxPushCurrentPtr cuCtxPushCurrent;
cuCtxPopCurrentPtr cuCtxPopCurrent;
cuCtxGetDevicePtr cuCtxGetDevice;
cuCtxSynchronizePtr cuCtxSynchronize;
int m_deviceCount;
CudaDevice * m_devices;
};
int main(void)
{
CudaContext ctx;
// cuInit(0);
return 0;
}

View File

@ -1,58 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(nvtt::Format_BC1);
nvtt::OutputOptions outputOptions;
outputOptions.setFileName("output.dds");
nvtt::Context context;
nvtt::TexImage image = context.createTexImage();
image.load("kodim01.png");
context.outputHeader(image, image.countMipmaps(), compressionOptions, outputOptions);
float gamma = 2.2;
image.toLinear(gamma);
while (image.buildNextMipmap(nvtt::MipmapFilter_Box))
{
nvtt::TexImage tmpImage = image;
tmpImage.toGamma(gamma);
context.compress(tmpImage, compressionOptions, outputOptions);
// tmpImage.compress(compressionOptions, outputOptions);
}
return EXIT_SUCCESS;
}

View File

@ -1,344 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvtt/nvtt.h>
#include "cmdline.h"
extern "C" {
#include <libavcodec/avcodec.h>
//#include <libavformat/avformat.h>
}
// http://ffmpeg.mplayerhq.hu/general.html
// http://cekirdek.pardus.org.tr/~ismail/ffmpeg-docs/apiexample_8c-source.html
using namespace nv;
static float s_quality = 0.5f;
static AVFrame * createPicture(const Image & image)
{
const uint w = image.width();
const uint h = image.height();
const uint size = w * h;
AVFrame * picture = avcodec_alloc_frame();
uint8_t * buffer = (uint8_t *)malloc((size * 3) / 2);
picture->data[0] = buffer;
picture->data[1] = buffer + size;
picture->data[2] = buffer + size + size / 4;
picture->linesize[0] = w;
picture->linesize[1] = w / 2;
picture->linesize[2] = w / 2;
memset(buffer, 0, (size * 3) / 2);
// Convert image to YCbCr 4:2:0
// Y
for (uint y=0;y<h;y++)
{
for (uint x=0;x<w;x++)
{
Color32 c = image.pixel(x, y);
float R = (1 / 255.0f) * c.r;
float G = (1 / 255.0f) * c.g;
float B = (1 / 255.0f) * c.b;
//float Y = 0.299f * R + 0.587f * G + 0.114f * B;
float Y = 16 + (65.481f * R + 128.553f * G + 24.966f * B);
picture->data[0][y * picture->linesize[0] + x] = (uint8)clamp(Y, 0.0f, 255.0f);
}
}
// Cb and Cr
for (uint y=0;y<h/2;y++)
{
for (uint x=0;x<w/2;x++)
{
Color32 c0 = image.pixel(2*x+0, 2*y+0);
Color32 c1 = image.pixel(2*x+1, 2*y+0);
Color32 c2 = image.pixel(2*x+0, 2*y+1);
Color32 c3 = image.pixel(2*x+1, 2*y+1);
float R = (1 / 255.0f) * 0.25f * (c0.r + c1.r + c2.r + c3.r);
float G = (1 / 255.0f) * 0.25f * (c0.g + c1.g + c2.g + c3.g);
float B = (1 / 255.0f) * 0.25f * (c0.b + c1.b + c2.b + c3.b);
//float Pb = - 0.168736f * R - 0.331264f * G + 0.5f * B;
//float Pr = + 0.5f * R - 0.418688f * G - 0.081312f * B;
float Cb = 128 + (-37.797f * R - 74.203f * G + 112.0f * B);
float Cr = 128 + (112.0f * R - 93.786 * G - 18.214f * B);
picture->data[1][y * picture->linesize[1] + x] = (uint8)clamp(Cb, 0.0f, 255.0f);;
picture->data[2][y * picture->linesize[2] + x] = (uint8)clamp(Cr, 0.0f, 255.0f);;
}
}
return picture;
}
static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize, const char * filename)
{
FILE * f = fopen(filename, "w");
fprintf(f,"P5\n%d %d\n%d\n",xsize, ysize, 255);
for (int i = 0; i < ysize; i++)
fwrite(buf + i * wrap,1,xsize,f);
fclose(f);
}
static void savePicture(const AVFrame * picture, int w, int h)
{
// @@ Combine planes.
pgm_save(picture->data[0], picture->linesize[0], w, h, "test_y.pgm");
pgm_save(picture->data[1], picture->linesize[1], w/2, h/2, "test_u.pgm");
pgm_save(picture->data[2], picture->linesize[2], w/2, h/2, "test_v.pgm");
}
static double psnr(double d) {
return -10.0*log(d)/log(10.0);
}
static void encodeFrame(const Image & image, CodecID format, Array<uint8> & frame)
{
AVFrame * picture = createPicture(image);
AVCodec * encoder = avcodec_find_encoder(format);
if (encoder == NULL)
{
printf("MPEG encoder not found.\n");
exit(1);
}
AVCodecContext * encoder_context = avcodec_alloc_context();
//encoder_context->me_method = 0;
encoder_context->width = image.width();
encoder_context->height = image.height();
encoder_context->pix_fmt = PIX_FMT_YUV420P;
//encoder_context->pix_fmt = PIX_FMT_YUV422P;
//encoder_context->pix_fmt = PIX_FMT_YUVJ420P;
encoder_context->time_base = (AVRational){1,25}; // required parameter. 25 fps?
encoder_context->bit_rate = 400000; // Quality?
//encoder_context->bit_rate = 200000; // Default
//encoder_context->bit_rate_tolerance = 20000;
//encoder_context->qmin = ?;
//encoder_context->qmax = ?;
//encoder_context->qcompress = ?;
//encoder_context->qblur = ?;
encoder_context->flags |= CODEC_FLAG_PSNR;
encoder_context->qcompress = s_quality;
//encoder_context->qblur = 1.0f;
//encoder_context->global_quality = FF_QP2LAMBDA * 0;
//encoder_context->max_qdiff = 3;
// Intra frames only
encoder_context->gop_size = 0;
if (avcodec_open(encoder_context, encoder) < 0)
{
printf("MPEG encoder initialization failed.\n");
exit(1);
}
frame.resize(1024 * 1024, 0); // resize and initialize to 0.
int out_size = avcodec_encode_video(encoder_context, frame.mutableBuffer(), frame.size(), picture);
frame.resize(out_size);
// Append sequence end code.
frame.append(0x00);
frame.append(0x00);
frame.append(0x01);
frame.append(0xb7);
int in_size = image.width() * image.height() * 3;
printf("Image size %d -> %d (1:%d)\n", in_size, out_size, in_size/out_size);
printf("PSNR = %4.2f\n", psnr(encoder_context->coded_frame->error[0]/(encoder_context->width*encoder_context->height*255.0*255.0)));
avcodec_close(encoder_context);
av_free(encoder_context);
av_free(picture);
}
static void decodeFrame(const Array<uint8> & frame, CodecID format)
{
AVCodec * decoder = avcodec_find_decoder(format);
if (decoder == NULL) {
printf("MPEG decoder not found.\n");
exit(1);
}
AVCodecContext * decoder_context = avcodec_alloc_context();
AVFrame * picture = avcodec_alloc_frame();
if (decoder->capabilities & CODEC_CAP_TRUNCATED)
decoder_context->flags |= CODEC_FLAG_TRUNCATED; /* we do not send complete frames */
if (avcodec_open(decoder_context, decoder) < 0) {
printf("MPEG decoder initialization failed.\n");
exit(1);
}
//memset(picture->data[0], 0, in_size / 2);
int got_picture = 0;
int len = avcodec_decode_video(decoder_context, picture, &got_picture, frame.buffer(), frame.size());
printf("decoded %d bytes\n", len);
if (len < 0) {
printf("Error while decoding frame.\n");
exit(1);
}
if (!got_picture) {
printf("Did not get any picture.\n");
exit(1);
}
//nvDebugCheck(outbuf_size == len);
//nvDebugCheck(got_picture == true);
savePicture(picture, decoder_context->width, decoder_context->height);
avcodec_close(decoder_context);
av_free(decoder_context);
av_free(picture);
}
int main(int argc, char *argv[])
{
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
nv::Path input;
nv::Path output;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quality") == 0)
{
if (i+1 < argc && argv[i+1][0] != '-')
{
s_quality = atof(argv[i+1]);
i++;
}
}
else if (argv[i][0] != '-')
{
input = argv[i];
if (i+1 < argc && argv[i+1][0] != '-')
{
output = argv[i+1];
i++;
}
else
{
output.copy(input.str());
output.stripExtension();
output.append(".mpeg");
}
break;
}
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007-2008\n\n");
if (input.isNull())
{
printf("usage: nvmpegcompress [options] infile [outfile]\n\n");
return 1;
}
// Load image.
Image image;
if (!image.load(input))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return 1;
}
// Initialize codecs.
avcodec_init();
avcodec_register_all();
//CodecID format = CODEC_ID_MPEG1VIDEO;
CodecID format = CODEC_ID_MPEG2VIDEO;
//CodecID format = CODEC_ID_MJPEG;
//CodecID format = CODEC_ID_THEORA;
//CodecID format = CODEC_ID_H264;
// Encode frame.
Array<uint8> frame;
encodeFrame(image, format, frame);
// Save resulting I-frame.
StdOutputStream outputStream(output.str());
if (outputStream.isError())
{
printf("Error opening '%s' for writing.\n", output.str());
return 1;
}
outputStream.serialize(frame.mutableBuffer(), frame.size());
//decodeFrame(frame, format);
// @@ Compare image against original, and compute RMS.
return 0;
}

View File

@ -1,529 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <nvimage/Image.h>
#include <nvimage/ImageIO.h>
#include <nvimage/BlockDXT.h>
#include <nvimage/ColorBlock.h>
#include <nvcore/Ptr.h>
#include <nvcore/Debug.h>
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvcore/TextWriter.h>
#include <nvcore/FileSystem.h>
#include <nvcore/Timer.h>
#include <stdlib.h> // free
#include <string.h> // memcpy
using namespace nv;
// Kodak image set
static const char * s_kodakImageSet[] = {
"kodim01.png",
"kodim02.png",
"kodim03.png",
"kodim04.png",
"kodim05.png",
"kodim06.png",
"kodim07.png",
"kodim08.png",
"kodim09.png",
"kodim10.png",
"kodim11.png",
"kodim12.png",
"kodim13.png",
"kodim14.png",
"kodim15.png",
"kodim16.png",
"kodim17.png",
"kodim18.png",
"kodim19.png",
"kodim20.png",
"kodim21.png",
"kodim22.png",
"kodim23.png",
"kodim24.png",
};
// Waterloo image set
static const char * s_waterlooImageSet[] = {
"clegg.png",
"frymire.png",
"lena.png",
"monarch.png",
"peppers.png",
"sail.png",
"serrano.png",
"tulips.png",
};
// Epic image set
static const char * s_epicImageSet[] = {
"Bradley1.png",
"Gradient.png",
"MoreRocks.png",
"Wall.png",
"Rainbow.png",
"Text.png",
};
// Farbrausch
static const char * s_farbrauschImageSet[] = {
"t.2d.pn02.bmp",
"t.aircondition.01.bmp",
"t.bricks.02.bmp",
"t.bricks.05.bmp",
"t.concrete.cracked.01.bmp",
"t.envi.colored02.bmp",
"t.envi.colored03.bmp",
"t.font.01.bmp",
"t.sewers.01.bmp",
"t.train.03.bmp",
"t.yello.01.bmp",
};
// Lugaru
static const char * s_lugaruImageSet[] = {
"lugaru-blood.png",
"lugaru-bush.png",
"lugaru-cursor.png",
"lugaru-hawk.png",
};
// Quake3
static const char * s_quake3ImageSet[] = {
"q3-blocks15cgeomtrn.tga",
"q3-blocks17bloody.tga",
"q3-dark_tin2.tga",
"q3-fan_grate.tga",
"q3-fan.tga",
"q3-metal2_2.tga",
"q3-panel_glo.tga",
"q3-proto_fence.tga",
"q3-wires02.tga",
};
struct ImageSet
{
const char ** fileNames;
int fileCount;
nvtt::Format format;
};
static ImageSet s_imageSets[] = {
{s_kodakImageSet, sizeof(s_kodakImageSet)/sizeof(s_kodakImageSet[0]), nvtt::Format_DXT1},
{s_waterlooImageSet, sizeof(s_waterlooImageSet)/sizeof(s_waterlooImageSet[0]), nvtt::Format_DXT1},
{s_epicImageSet, sizeof(s_epicImageSet)/sizeof(s_epicImageSet[0]), nvtt::Format_DXT1},
{s_farbrauschImageSet, sizeof(s_farbrauschImageSet)/sizeof(s_farbrauschImageSet[0]), nvtt::Format_DXT1},
{s_lugaruImageSet, sizeof(s_lugaruImageSet)/sizeof(s_lugaruImageSet[0]), nvtt::Format_DXT5},
{s_quake3ImageSet, sizeof(s_quake3ImageSet)/sizeof(s_quake3ImageSet[0]), nvtt::Format_DXT5},
};
const int s_imageSetCount = sizeof(s_imageSets)/sizeof(s_imageSets[0]);
enum Decoder
{
Decoder_Reference,
Decoder_NVIDIA,
};
struct MyOutputHandler : public nvtt::OutputHandler
{
MyOutputHandler() : m_data(NULL), m_ptr(NULL) {}
~MyOutputHandler()
{
free(m_data);
}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{
m_size = size;
m_width = width;
m_height = height;
free(m_data);
m_data = (unsigned char *)malloc(size);
m_ptr = m_data;
}
virtual bool writeData(const void * data, int size)
{
memcpy(m_ptr, data, size);
m_ptr += size;
return true;
}
Image * decompress(nvtt::Format format, Decoder decoder)
{
int bw = (m_width + 3) / 4;
int bh = (m_height + 3) / 4;
AutoPtr<Image> img( new Image() );
img->allocate(m_width, m_height);
if (format == nvtt::Format_BC1)
{
BlockDXT1 * block = (BlockDXT1 *)m_data;
for (int y = 0; y < bh; y++)
{
for (int x = 0; x < bw; x++)
{
ColorBlock colors;
if (decoder == Decoder_Reference) {
block->decodeBlock(&colors);
}
else if (decoder == Decoder_NVIDIA) {
block->decodeBlockNV5x(&colors);
}
for (int yy = 0; yy < 4; yy++)
{
for (int xx = 0; xx < 4; xx++)
{
Color32 c = colors.color(xx, yy);
if (x * 4 + xx < m_width && y * 4 + yy < m_height)
{
img->pixel(x * 4 + xx, y * 4 + yy) = c;
}
}
}
block++;
}
}
}
else if (format == nvtt::Format_BC3)
{
BlockDXT5 * block = (BlockDXT5 *)m_data;
for (int y = 0; y < bh; y++)
{
for (int x = 0; x < bw; x++)
{
ColorBlock colors;
if (decoder == Decoder_Reference) {
block->decodeBlock(&colors);
}
else if (decoder == Decoder_NVIDIA) {
block->decodeBlockNV5x(&colors);
}
for (int yy = 0; yy < 4; yy++)
{
for (int xx = 0; xx < 4; xx++)
{
Color32 c = colors.color(xx, yy);
if (x * 4 + xx < m_width && y * 4 + yy < m_height)
{
img->pixel(x * 4 + xx, y * 4 + yy) = c;
}
}
}
block++;
}
}
}
return img.release();
}
int m_size;
int m_width;
int m_height;
unsigned char * m_data;
unsigned char * m_ptr;
};
float rmsError(const Image * a, const Image * b)
{
nvCheck(a != NULL);
nvCheck(b != NULL);
nvCheck(a->width() == b->width());
nvCheck(a->height() == b->height());
double mse = 0;
const uint count = a->width() * a->height();
for (uint i = 0; i < count; i++)
{
Color32 c0 = a->pixel(i);
Color32 c1 = b->pixel(i);
int r = c0.r - c1.r;
int g = c0.g - c1.g;
int b = c0.b - c1.b;
int a = c0.a - c1.a;
mse += double(r * r * c0.a) / 255;
mse += double(g * g * c0.a) / 255;
mse += double(b * b * c0.a) / 255;
}
return float(sqrt(mse / count));
}
int main(int argc, char *argv[])
{
const uint version = nvtt::version();
const uint major = version / 100;
const uint minor = version % 100;
printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007 - 2008\n\n", major, minor);
int set = 0;
bool fast = false;
bool nocuda = false;
bool showHelp = false;
Decoder decoder = Decoder_Reference;
const char * basePath = "";
const char * outPath = "output";
const char * regressPath = NULL;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
if (strcmp("-set", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
set = atoi(argv[i+1]);
i++;
}
}
else if (strcmp("-dec", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
decoder = (Decoder)atoi(argv[i+1]);
i++;
}
}
else if (strcmp("-fast", argv[i]) == 0)
{
fast = true;
}
else if (strcmp("-nocuda", argv[i]) == 0)
{
nocuda = true;
}
else if (strcmp("-help", argv[i]) == 0)
{
showHelp = true;
}
else if (strcmp("-path", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
basePath = argv[i+1];
i++;
}
}
else if (strcmp("-out", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
outPath = argv[i+1];
i++;
}
}
else if (strcmp("-regress", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
regressPath = argv[i+1];
i++;
}
}
}
if (showHelp)
{
printf("usage: nvtestsuite [options]\n\n");
printf("Input options:\n");
printf(" -path <path> \tInput image path.\n");
printf(" -regress <path>\tRegression directory.\n");
printf(" -set [0:2] \tImage set.\n");
printf(" 0: \tKodak.\n");
printf(" 1: \tWaterloo.\n");
printf(" 2: \tEpic.\n");
printf(" 3: \tFarbrausch.\n");
printf(" -dec x \tDecompressor.\n");
printf(" 0: \tReference.\n");
printf(" 1: \tNVIDIA.\n");
printf("Compression options:\n");
printf(" -fast \tFast compression.\n");
printf(" -nocuda \tDo not use cuda compressor.\n");
printf("Output options:\n");
printf(" -out <path> \tOutput directory.\n");
return 1;
}
nvtt::InputOptions inputOptions;
inputOptions.setMipmapGeneration(false);
inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency);
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(nvtt::Format_BC1);
if (fast)
{
compressionOptions.setQuality(nvtt::Quality_Fastest);
}
else
{
compressionOptions.setQuality(nvtt::Quality_Production);
}
//compressionOptions.setExternalCompressor("ati");
//compressionOptions.setExternalCompressor("squish");
//compressionOptions.setExternalCompressor("d3dx");
//compressionOptions.setExternalCompressor("stb");
compressionOptions.setFormat(s_imageSets[set].format);
nvtt::OutputOptions outputOptions;
outputOptions.setOutputHeader(false);
MyOutputHandler outputHandler;
outputOptions.setOutputHandler(&outputHandler);
nvtt::Context context;
context.enableCudaAcceleration(!nocuda);
FileSystem::changeDirectory(basePath);
FileSystem::createDirectory(outPath);
Path csvFileName;
csvFileName.format("%s/result.csv", outPath);
StdOutputStream csvStream(csvFileName);
TextWriter csvWriter(&csvStream);
float totalTime = 0;
float totalRMSE = 0;
int failedTests = 0;
float totalDiff = 0;
const char ** fileNames = s_imageSets[set].fileNames;
int fileCount = s_imageSets[set].fileCount;
Timer timer;
for (int i = 0; i < fileCount; i++)
{
AutoPtr<Image> img( new Image() );
if (!img->load(fileNames[i]))
{
printf("Input image '%s' not found.\n", fileNames[i]);
return EXIT_FAILURE;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, img->width(), img->height());
inputOptions.setMipmapData(img->pixels(), img->width(), img->height());
printf("Compressing: \t'%s'\n", fileNames[i]);
timer.start();
context.process(inputOptions, compressionOptions, outputOptions);
timer.stop();
printf(" Time: \t%.3f sec\n", float(timer.elapsed()) / 1000);
totalTime += float(timer.elapsed()) / 1000;
AutoPtr<Image> img_out( outputHandler.decompress(s_imageSets[set].format, decoder) );
Path outputFileName;
outputFileName.format("%s/%s", outPath, fileNames[i]);
outputFileName.stripExtension();
outputFileName.append(".png");
if (!ImageIO::save(outputFileName, img_out.ptr()))
{
printf("Error saving file '%s'.\n", outputFileName.str());
}
float rmse = rmsError(img.ptr(), img_out.ptr());
totalRMSE += rmse;
printf(" RMSE: \t%.4f\n", rmse);
// Output csv file
csvWriter << "\"" << fileNames[i] << "\"," << rmse << "\n";
if (regressPath != NULL)
{
Path regressFileName;
regressFileName.format("%s/%s", regressPath, fileNames[i]);
regressFileName.stripExtension();
regressFileName.append(".png");
AutoPtr<Image> img_reg( new Image() );
if (!img_reg->load(regressFileName.str()))
{
printf("Regression image '%s' not found.\n", regressFileName.str());
return EXIT_FAILURE;
}
float rmse_reg = rmsError(img.ptr(), img_reg.ptr());
float diff = rmse_reg - rmse;
totalDiff += diff;
const char * text = "PASSED";
if (equal(diff, 0)) text = "PASSED";
else if (diff < 0) {
text = "FAILED";
failedTests++;
}
printf(" Diff: \t%.4f (%s)\n", diff, text);
}
fflush(stdout);
}
totalRMSE /= fileCount;
totalDiff /= fileCount;
printf("Total Results:\n");
printf(" Total Time: \t%.3f sec\n", totalTime);
printf(" Average RMSE:\t%.4f\n", totalRMSE);
if (regressPath != NULL)
{
printf("Regression Results:\n");
printf(" Diff: %.4f\n", totalDiff);
printf(" %d/%d tests failed.\n", failedTests, fileCount);
}
return EXIT_SUCCESS;
}

View File

@ -1,63 +0,0 @@
ADD_EXECUTABLE(nvcompress compress.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvdecompress decompress.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage)
ADD_EXECUTABLE(nvddsinfo ddsinfo.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage)
ADD_EXECUTABLE(nvimgdiff imgdiff.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage)
ADD_EXECUTABLE(nvassemble assemble.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(nvzoom resize.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
ADD_EXECUTABLE(nv-gnome-thumbnailer thumbnailer.cpp cmdline.h)
TARGET_LINK_LIBRARIES(nv-gnome-thumbnailer nvcore nvmath nvimage)
INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom nv-gnome-thumbnailer DESTINATION bin)
# Use gconftool-2 to install gnome thumbnailer
FIND_PROGRAM(GCONFTOOL2 gconftool-2)
IF(GCONFTOOL2)
CONFIGURE_FILE(nvtt-thumbnailer.schema.in ${CMAKE_CURRENT_BINARY_DIR}/nvtt-thumbnailer.schema)
INSTALL(CODE "MESSAGE(STATUS \"Installing thumbnailer schema\")")
#gconftool-2 --get-default-source
INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${GCONFTOOL2} --get-default-source OUTPUT_VARIABLE GCONF_CONFIG_SOURCE OUTPUT_STRIP_TRAILING_WHITESPACE)")
INSTALL(CODE "set(ENV{GCONF_CONFIG_SOURCE} \"\${GCONF_CONFIG_SOURCE}\")")
INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${GCONFTOOL2} --makefile-install-rule ${CMAKE_CURRENT_BINARY_DIR}/nvtt-thumbnailer.schema)")
ENDIF(GCONFTOOL2)
# UI tools
IF(QT4_FOUND) # AND NOT MSVC)
SET(QT_USE_QTOPENGL TRUE)
INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
SET(SRCS
ui/main.cpp
ui/configdialog.h
ui/configdialog.cpp)
SET(LIBS
nvtt
${QT_QTCORE_LIBRARY}
${QT_QTGUI_LIBRARY}
${QT_QTOPENGL_LIBRARY})
QT4_WRAP_UI(UICS ui/configdialog.ui)
QT4_WRAP_CPP(MOCS ui/configdialog.h)
#QT4_ADD_RESOURCES(RCCS ui/configdialog.rc)
ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS})
TARGET_LINK_LIBRARIES(nvcompressui ${LIBS})
ENDIF(QT4_FOUND) # AND NOT MSVC)

View File

@ -21,20 +21,20 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "cmdline.h"
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvimage/Image.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvtt/nvtt.h>
#include <nvimage/Image.h> // @@ It might be a good idea to use FreeImage directly instead of ImageIO.
#include <nvimage/ImageIO.h>
#include <nvimage/FloatImage.h>
#include <nvimage/DirectDrawSurface.h>
#include "cmdline.h"
#include <nvcore/Ptr.h>
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvcore/FileSystem.h>
#include <nvcore/Timer.h>
#include <time.h> // clock
//#define WINDOWS_LEAN_AND_MEAN
//#include <windows.h> // TIMER
struct MyOutputHandler : public nvtt::OutputHandler
@ -141,17 +141,12 @@ int main(int argc, char *argv[])
bool noMipmaps = false;
bool fast = false;
bool nocuda = false;
bool silent = false;
bool bc1n = false;
nvtt::Format format = nvtt::Format_BC1;
bool premultiplyAlpha = false;
nvtt::MipmapFilter mipmapFilter = nvtt::MipmapFilter_Box;
bool loadAsFloat = false;
const char * externalCompressor = NULL;
bool silent = false;
bool dds10 = false;
nv::Path input;
nv::Path output;
@ -186,23 +181,6 @@ int main(int argc, char *argv[])
{
noMipmaps = true;
}
else if (strcmp("-premula", argv[i]) == 0)
{
premultiplyAlpha = true;
}
else if (strcmp("-mipfilter", argv[i]) == 0)
{
if (i+1 == argc) break;
i++;
if (strcmp("box", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Box;
else if (strcmp("triangle", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Triangle;
else if (strcmp("kaiser", argv[i]) == 0) mipmapFilter = nvtt::MipmapFilter_Kaiser;
}
else if (strcmp("-float", argv[i]) == 0)
{
loadAsFloat = true;
}
// Compression options.
else if (strcmp("-fast", argv[i]) == 0)
@ -260,15 +238,11 @@ int main(int argc, char *argv[])
}
}
// Output options
// Misc options
else if (strcmp("-silent", argv[i]) == 0)
{
silent = true;
}
else if (strcmp("-dds10", argv[i]) == 0)
{
dds10 = true;
}
else if (argv[i][0] != '-')
{
@ -300,16 +274,13 @@ int main(int argc, char *argv[])
printf("usage: nvcompress [options] infile [outfile]\n\n");
printf("Input options:\n");
printf(" -color \tThe input image is a color map (default).\n");
printf(" -alpha \tThe input image has an alpha channel used for transparency.\n");
printf(" -normal \tThe input image is a normal map.\n");
printf(" -tonormal \tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n");
printf(" -repeat \tRepeat wrapping mode.\n");
printf(" -nomips \tDisable mipmap generation.\n");
printf(" -premula \tPremultiply alpha into color channel.\n");
printf(" -mipfilter \tMipmap filter. One of the following: box, triangle, kaiser.\n");
printf(" -float \tLoad as floating point image.\n\n");
printf(" -color \tThe input image is a color map (default).\n");
printf(" -alpha \tThe input image has an alpha channel used for transparency.\n");
printf(" -normal \tThe input image is a normal map.\n");
printf(" -tonormal\tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n");
printf(" -repeat \tRepeat wrapping mode.\n");
printf(" -nomips \tDisable mipmap generation.\n\n");
printf("Compression options:\n");
printf(" -fast \tFast compression.\n");
@ -324,19 +295,10 @@ int main(int argc, char *argv[])
printf(" -bc4 \tBC4 format (ATI1)\n");
printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n");
printf("Output options:\n");
printf(" -silent \tDo not output progress messages\n");
printf(" -dds10 \tUse DirectX 10 DDS format\n\n");
return EXIT_FAILURE;
}
// Make sure input file exists.
if (!nv::FileSystem::exists(input.str()))
{
fprintf(stderr, "The file '%s' does not exist.\n", input.str());
return 1;
}
// @@ Make sure input file exists.
// Set input options.
nvtt::InputOptions inputOptions;
@ -378,7 +340,7 @@ int main(int argc, char *argv[])
{
for (uint m = 0; m < mipmapCount; m++)
{
dds.mipmap(&mipmap, f, m); // @@ Load as float.
dds.mipmap(&mipmap, f, m);
inputOptions.setMipmapData(mipmap.pixels(), mipmap.width(), mipmap.height(), 1, f, m);
}
@ -386,42 +348,16 @@ int main(int argc, char *argv[])
}
else
{
if (nv::strCaseCmp(input.extension(), ".exr") == 0)
// Regular image.
nv::Image image;
if (!image.load(input))
{
loadAsFloat = true;
}
if (loadAsFloat)
{
nv::AutoPtr<nv::FloatImage> image(nv::ImageIO::loadFloat(input));
if (image == NULL)
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return EXIT_FAILURE;
}
inputOptions.setFormat(nvtt::InputFormat_RGBA_32F);
inputOptions.setTextureLayout(nvtt::TextureType_2D, image->width(), image->height());
for (uint i = 0; i < image->componentNum(); i++)
{
inputOptions.setMipmapChannelData(image->channel(i), i, image->width(), image->height());
}
}
else
{
// Regular image.
nv::Image image;
if (!image.load(input))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return 1;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
inputOptions.setMipmapData(image.pixels(), image.width(), image.height());
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return EXIT_FAILURE;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
inputOptions.setMipmapData(image.pixels(), image.width(), image.height());
}
if (wrapRepeat)
@ -460,31 +396,8 @@ int main(int argc, char *argv[])
inputOptions.setMipmapGeneration(false);
}
if (premultiplyAlpha)
{
inputOptions.setPremultiplyAlpha(true);
inputOptions.setAlphaMode(nvtt::AlphaMode_Premultiplied);
}
inputOptions.setMipmapFilter(mipmapFilter);
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(format);
if (format == nvtt::Format_BC2) {
// Dither alpha when using BC2.
compressionOptions.setQuantization(false, true, false);
}
if (format == nvtt::Format_RGBA)
{
// @@ Edit this to choose the desired pixel format:
// compressionOptions.setPixelType(nvtt::PixelType_Float);
// compressionOptions.setPixelFormat(16, 16, 16, 16);
// compressionOptions.setPixelType(nvtt::PixelType_UnsignedNorm);
// compressionOptions.setPixelFormat(16, 0, 0, 0);
}
if (fast)
{
compressionOptions.setQuality(nvtt::Quality_Fastest);
@ -515,11 +428,11 @@ int main(int argc, char *argv[])
return EXIT_FAILURE;
}
nvtt::Context context;
context.enableCudaAcceleration(!nocuda);
nvtt::Compressor compressor;
compressor.enableCudaAcceleration(!nocuda);
printf("CUDA acceleration ");
if (context.isCudaAccelerationEnabled())
if (compressor.isCudaAccelerationEnabled())
{
printf("ENABLED\n\n");
}
@ -528,7 +441,7 @@ int main(int argc, char *argv[])
printf("DISABLED\n\n");
}
outputHandler.setTotal(context.estimateSize(inputOptions, compressionOptions));
outputHandler.setTotal(compressor.estimateSize(inputOptions, compressionOptions));
outputHandler.setDisplayProgress(!silent);
nvtt::OutputOptions outputOptions;
@ -536,25 +449,19 @@ int main(int argc, char *argv[])
outputOptions.setOutputHandler(&outputHandler);
outputOptions.setErrorHandler(&errorHandler);
if (dds10)
{
outputOptions.setContainer(nvtt::Container_DDS10);
}
// printf("Press ENTER.\n");
// fflush(stdout);
// getchar();
Timer timer;
timer.start();
clock_t start = clock();
if (!context.process(inputOptions, compressionOptions, outputOptions))
if (!compressor.process(inputOptions, compressionOptions, outputOptions))
{
return EXIT_FAILURE;
}
timer.stop();
printf("\rtime taken: %.3f seconds\n", timer.elapsed());
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return EXIT_SUCCESS;
}

View File

@ -12,6 +12,9 @@
<property name="windowTitle" >
<string>NVIDIA Texture Tools</string>
</property>
<property name="windowIcon" >
<iconset/>
</property>
<property name="sizeGripEnabled" >
<bool>true</bool>
</property>

View File

@ -31,161 +31,41 @@
#include "cmdline.h"
#include <time.h> // clock
int main(int argc, char *argv[])
{
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
bool forcenormal = false;
bool mipmaps = false;
bool faces = false;
bool savePNG = false;
nv::Path input;
nv::Path output;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
if (strcmp("-forcenormal", argv[i]) == 0)
{
forcenormal = true;
}
else if (strcmp("-mipmaps", argv[i]) == 0)
{
mipmaps = true;
}
else if (strcmp("-faces", argv[i]) == 0)
{
faces = true;
}
else if (strcmp("-format", argv[i]) == 0)
{
if (i+1 == argc) break;
i++;
#ifdef HAVE_PNG
if (strcmp("png", argv[i]) == 0) savePNG = true;
else
#endif
if (strcmp("tga", argv[i]) == 0) savePNG = false;
else
{
fprintf(stderr, "Unsupported output format '%s', defaulting to 'tga'.\n", argv[i]);
savePNG = false;
}
}
else if (argv[i][0] != '-')
{
input = argv[i];
if (i+1 < argc && argv[i+1][0] != '-')
{
output = argv[i+1];
}
else
{
output.copy(input.str());
}
break;
}
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
if (input.isNull())
if (argc != 2)
{
printf("usage: nvdecompress [options] infile [outfile]\n\n");
printf("Note: the .tga or .png extension is forced on outfile\n\n");
printf("Input options:\n");
printf(" -forcenormal \tThe input image is a normal map.\n");
printf(" -mipmaps \tDecompress all mipmaps.\n");
printf(" -faces \tDecompress all faces.\n");
printf(" -format <format>\tOutput format ('tga' or 'png').\n");
return 1;
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
printf("usage: nvdecompress 'ddsfile'\n\n");
return 1;
}
// Load surface.
nv::DirectDrawSurface dds(input);
nv::DirectDrawSurface dds(argv[1]);
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
return 1;
}
if (!dds.isSupported() || dds.isTexture3D())
{
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
printf("The file '%s' is not a valid DDS file.\n", argv[1]);
return 1;
}
uint faceCount;
if (dds.isTexture2D())
{
faceCount = 1;
}
else
{
nvCheck(dds.isTextureCube());
faceCount = 6;
nv::Path name(argv[1]);
name.stripExtension();
name.append(".tga");
nv::StdOutputStream stream(name.str());
if (stream.isError()) {
printf("Error opening '%s' for writting\n", name.str());
return 1;
}
uint mipmapCount = dds.mipmapCount();
clock_t start = clock();
// apply arguments
if (forcenormal)
{
dds.setNormalFlag(true);
}
if (!faces)
{
faceCount = 1;
}
if (!mipmaps)
{
mipmapCount = 1;
}
// @@ TODO: Add command line options to output mipmaps, cubemap faces, etc.
nv::Image img;
dds.mipmap(&img, 0, 0); // get first image
nv::ImageIO::saveTGA(stream, &img);
nv::Image mipmap;
nv::Path name;
// strip extension, we force the tga extension
output.stripExtension();
// extract faces and mipmaps
for (uint f = 0; f < faceCount; f++)
{
for (uint m = 0; m < mipmapCount; m++)
{
dds.mipmap(&mipmap, f, m);
// set output filename, if we are doing faces and/or mipmaps
name.copy(output);
if (faces) name.appendFormat("_face%d", f);
if (mipmaps) name.appendFormat("_mipmap%d", m);
name.append(savePNG ? ".png" : ".tga");
nv::StdOutputStream stream(name.str());
if (stream.isError()) {
fprintf(stderr, "Error opening '%s' for writting\n", name.str());
return 1;
}
nv::ImageIO::save(name, stream, &mipmap);
}
}
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return 0;
}

View File

@ -1,26 +0,0 @@
<gconfschemafile>
<schemalist>
<schema>
<key>/schemas/desktop/gnome/thumbnailers/image@x-dds/enable</key>
<applyto>/desktop/gnome/thumbnailers/image@x-dds/enable</applyto>
<owner>nvtt-thumbnailer</owner>
<type>bool</type>
<default>true</default>
<locale name="C">
<short></short>
<long></long>
</locale>
</schema>
<schema>
<key>/schemas/desktop/gnome/thumbnailers/image@x-dds/command</key>
<applyto>/desktop/gnome/thumbnailers/image@x-dds/command</applyto>
<owner>nvtt-thumbnailer</owner>
<type>string</type>
<default>@CMAKE_INSTALL_PREFIX@/bin/nv-gnome-thumbnailer -s %s %i %o</default>
<locale name="C">
<short></short>
<long></long>
</locale>
</schema>
</schemalist>
</gconfschemafile>

View File

@ -176,7 +176,7 @@ int main(int argc, char *argv[])
result->setFormat(nv::Image::Format_ARGB);
nv::StdOutputStream stream(output);
nv::ImageIO::save(output, stream, result.ptr());
nv::ImageIO::saveTGA(stream, result.ptr()); // @@ Add generic save function. Add support for png too.
return 0;
}

View File

@ -1,158 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Ptr.h>
#include <nvcore/StrLib.h>
#include <nvcore/StdStream.h>
#include <nvcore/Containers.h>
#include <nvimage/Image.h>
#include <nvimage/ImageIO.h>
#include <nvimage/FloatImage.h>
#include <nvimage/Filter.h>
#include <nvimage/DirectDrawSurface.h>
#include <nvmath/Color.h>
#include <nvmath/Vector.h>
#include <math.h>
#include "cmdline.h"
static bool loadImage(nv::Image & image, const char * fileName)
{
if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0)
{
nv::DirectDrawSurface dds(fileName);
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", fileName);
return false;
}
dds.mipmap(&image, 0, 0); // get first image
}
else
{
// Regular image.
if (!image.load(fileName))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", fileName);
return false;
}
}
return true;
}
int main(int argc, char *argv[])
{
//MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
float gamma = 2.2f;
nv::Path input;
nv::Path output;
int size = 128;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
// Input options.
if (strcmp("-s", argv[i]) == 0)
{
if (i+1 < argc && argv[i+1][0] != '-') {
size = (int)atoi(argv[i+1]);
i++;
}
}
else if (argv[i][0] != '-')
{
input = argv[i];
if (i+1 < argc && argv[i+1][0] != '-') {
output = argv[i+1];
}
else
{
fprintf(stderr, "No output filename.\n");
return 1;
}
break;
}
}
if (input.isNull() || output.isNull())
{
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
printf("usage: nv-gnome-thumbnailer [options] input output\n\n");
printf("Options:\n");
printf(" -s size\tThumbnail size (default = 128)\n");
return 1;
}
nv::Image image;
if (!loadImage(image, input)) return 1;
nv::ImageIO::ImageMetaData metaData;
metaData.tagMap.add("Thumb::Image::Width", nv::StringBuilder().number (image.width()));
metaData.tagMap.add("Thumb::Image::Height", nv::StringBuilder().number (image.height()));
if ((image.width() > size) || (image.height() > size))
{
nv::FloatImage fimage(&image);
fimage.toLinear(0, 3, gamma);
uint thumbW, thumbH;
if (image.width() > image.height())
{
thumbW = size;
thumbH = uint ((float (image.height()) / float (image.width())) * size);
}
else
{
thumbW = uint ((float (image.width()) / float (image.height())) * size);
thumbH = size;
}
nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp));
nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
result->setFormat(nv::Image::Format_ARGB);
nv::StdOutputStream stream(output);
nv::ImageIO::save(output, stream, result.ptr(), &metaData);
}
else
{
nv::StdOutputStream stream(output);
nv::ImageIO::save(output, stream, &image, &metaData);
}
return 0;
}