rename 2.0 tag to 2.0.0

This commit is contained in:
castano
2008-02-28 07:55:19 +00:00
parent 7933aeea09
commit a1b655d0d5
26 changed files with 1760 additions and 2984 deletions

View File

@ -1,12 +1,3 @@
NVIDIA Texture Tools version 2.1.0
* CTX1 CUDA compressor.
* DXT1n CUDA compressor.
NVIDIA Texture Tools version 2.0.1
* Fix memory leaks.
* Pre-allocate device memory for CUDA compressor.
* Add single color compressor.
NVIDIA Texture Tools version 2.0.0
* Fixed PSNR formula in nvimgdiff.
* Added support for arbitrary RGB formats.

View File

@ -2,7 +2,7 @@
--------------------------------------------------------------------------------
NVIDIA Texture Tools
README.txt
Version 2.1
Version 2.0.0
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------

View File

@ -1 +1 @@
2.1.0
2.0.0

0
gnuwin32/bin/libpng12.dll Executable file → Normal file
View File

BIN
gnuwin32/lib/libpng.a Normal file

Binary file not shown.

BIN
gnuwin32/lib/libz.a Normal file

Binary file not shown.

View File

@ -327,10 +327,6 @@
RelativePath="..\..\..\src\nvcore\nvcore.h"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Ptr.h"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\StrLib.h"
>

View File

@ -3,18 +3,18 @@ Microsoft Visual Studio Solution File, Format Version 9.00
# Visual Studio 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}"
@ -34,16 +34,16 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvdd
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}"
ProjectSection(ProjectDependencies) = postProject
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}"
@ -55,18 +55,13 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nv
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stress", "stress\stress.vcproj", "{317B694E-B5C1-42A6-956F-FC12B69175A6}"
ProjectSection(ProjectDependencies) = postProject
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug (no cuda)|Any CPU = Debug (no cuda)|Any CPU
@ -319,22 +314,6 @@ Global
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.Build.0 = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|x64.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -53,8 +53,8 @@ END
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 2,1,0,0
PRODUCTVERSION 2,1,0,0
FILEVERSION 2,0,0,0
PRODUCTVERSION 2,0,0,0
FILEFLAGSMASK 0x17L
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -71,12 +71,12 @@ BEGIN
BEGIN
VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 1, 0, 0"
VALUE "FileVersion", "2, 0, 0, 0"
VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 1, 0, 0"
VALUE "ProductVersion", "2, 0, 0, 0"
END
END
BLOCK "VarFileInfo"

View File

@ -1,201 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="stress"
ProjectGUID="{317B694E-B5C1-42A6-956F-FC12B69175A6}"
RootNamespace="stress"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\src\nvtt\tests\stress.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -110,19 +110,6 @@ void ColorBlock::splatY()
}
}
/// Returns true if the block has a single color.
bool ColorBlock::isSingleColor() const
{
for(int i = 1; i < 16; i++)
{
if (m_color[0] != m_color[i])
{
return false;
}
}
return true;
}
/// Count number of unique colors in this color block.
uint ColorBlock::countUniqueColors() const

View File

@ -24,7 +24,6 @@ namespace nv
void splatX();
void splatY();
bool isSingleColor() const;
uint countUniqueColors() const;
Color32 averageColor() const;
bool hasAlpha() const;

View File

@ -79,9 +79,6 @@ TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(stress tests/stress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(stress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)

View File

@ -69,14 +69,7 @@ void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & ou
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1(rgba.color(0), &block);
}
else
{
QuickCompress::compressDXT1(rgba, &block);
}
QuickCompress::compressDXT1(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -221,16 +214,10 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
rgba.init(image, x, y);
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1(rgba.color(0), &block);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));

View File

@ -56,7 +56,7 @@ namespace
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) {
if (format == Format_DXT1 || format == Format_DXT1a) {
return 8;
}
else if (format == Format_DXT3) {
@ -71,9 +71,6 @@ namespace
else if (format == Format_BC5) {
return 16;
}
else if (format == Format_CTX1) {
return 8;
}
return 0;
}
@ -208,19 +205,16 @@ namespace nvtt
Compressor::Compressor() : m(*new Compressor::Private())
{
// CUDA initialization.
m.cudaSupported = cuda::isHardwarePresent();
m.cudaEnabled = m.cudaSupported;
if (m.cudaEnabled)
{
m.cuda = new CudaCompressor();
}
// @@ Do CUDA initialization here.
}
Compressor::~Compressor()
{
delete &m;
// @@ Free CUDA resources here.
}
@ -231,11 +225,6 @@ void Compressor::enableCudaAcceleration(bool enable)
{
m.cudaEnabled = enable;
}
if (m.cudaEnabled && m.cuda == NULL)
{
m.cuda = new CudaCompressor();
}
}
/// Check if CUDA acceleration is enabled.
@ -336,7 +325,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
{
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) {
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
@ -357,10 +346,6 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
header.setFourCC('A', 'T', 'I', '2');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_CTX1) {
header.setFourCC('C', 'T', 'X', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
@ -685,7 +670,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT1(image, outputOptions, compressionOptions);
cudaCompressDXT1(image, outputOptions, compressionOptions);
}
else
{
@ -712,18 +697,6 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
}
}
}
else if (compressionOptions.format == Format_DXT1n)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT1n(image, outputOptions, compressionOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
}
else if (compressionOptions.format == Format_DXT3)
{
if (compressionOptions.quality == Quality_Fastest)
@ -735,7 +708,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT3(image, outputOptions, compressionOptions);
cudaCompressDXT3(image, outputOptions, compressionOptions);
}
else
{
@ -754,7 +727,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT5(image, outputOptions, compressionOptions);
cudaCompressDXT5(image, outputOptions, compressionOptions);
}
else
{
@ -781,18 +754,6 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
{
compressBC5(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_CTX1)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressCTX1(image, outputOptions, compressionOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
}
return true;
}

View File

@ -24,10 +24,6 @@
#ifndef NV_TT_COMPRESSOR_H
#define NV_TT_COMPRESSOR_H
#include <nvcore/Ptr.h>
#include <nvtt/cuda/CudaCompressDXT.h>
#include "nvtt.h"
namespace nv
@ -67,9 +63,6 @@ namespace nvtt
bool cudaSupported;
bool cudaEnabled;
nv::AutoPtr<nv::CudaCompressor> cuda;
};
} // nvtt namespace

View File

@ -353,18 +353,12 @@ static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.g = OMatch5[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.g = OMatch5[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)

View File

@ -48,12 +48,7 @@ void initTables()
};
*/
#if __CUDACC__
__constant__ unsigned short
#else
const static uint8
#endif
OMatch5[256][2] =
const static uint8 OMatch5[256][2] =
{
{0x00, 0x00},
{0x00, 0x00},
@ -313,12 +308,7 @@ OMatch5[256][2] =
{0x1F, 0x1F},
};
#if __CUDACC__
__constant__ unsigned short
#else
const static uint8
#endif
OMatch6[256][2] =
const static uint8 OMatch6[256][2] =
{
{0x00, 0x00},
{0x00, 0x01},

View File

@ -122,7 +122,7 @@ static void doPrecomputation()
*/
const static uint s_bitmapTable[992] =
const static uint bitmaps[992] =
{
0x80000000,
0x40000000,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -31,24 +31,11 @@ namespace nv
{
class Image;
class CudaCompressor
{
public:
CudaCompressor();
~CudaCompressor();
void cudaCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void cudaCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void cudaCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1n(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressCTX1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
private:
uint * m_bitmapTable;
uint * m_data;
uint * m_result;
};
void cudaCompressDXT1_2(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
} // nv namespace

View File

@ -1,389 +1,221 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// Math functions and operators to be used with vector types.
#ifndef CUDAMATH_H
#define CUDAMATH_H
#include <float.h>
inline __device__ __host__ float3 operator *(float3 a, float3 b)
{
return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
}
inline __device__ __host__ float3 operator *(float f, float3 v)
{
return make_float3(v.x*f, v.y*f, v.z*f);
}
inline __device__ __host__ float3 operator *(float3 v, float f)
{
return make_float3(v.x*f, v.y*f, v.z*f);
}
inline __device__ __host__ float3 operator +(float3 a, float3 b)
{
return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
}
inline __device__ __host__ void operator +=(float3 & b, float3 a)
{
b.x += a.x;
b.y += a.y;
b.z += a.z;
}
inline __device__ __host__ float3 operator -(float3 a, float3 b)
{
return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
}
inline __device__ __host__ void operator -=(float3 & b, float3 a)
{
b.x -= a.x;
b.y -= a.y;
b.z -= a.z;
}
inline __device__ __host__ float3 operator /(float3 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float3 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
b.z *= inv;
}
inline __device__ __host__ bool operator ==(float3 a, float3 b)
{
return a.x == b.x && a.y == b.y && a.z == b.z;
}
// float2 operators
inline __device__ __host__ float2 operator *(float2 a, float2 b)
{
return make_float2(a.x*b.x, a.y*b.y);
}
inline __device__ __host__ float2 operator *(float f, float2 v)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator *(float2 v, float f)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator +(float2 a, float2 b)
{
return make_float2(a.x+b.x, a.y+b.y);
}
inline __device__ __host__ void operator +=(float2 & b, float2 a)
{
b.x += a.x;
b.y += a.y;
}
inline __device__ __host__ float2 operator -(float2 a, float2 b)
{
return make_float2(a.x-b.x, a.y-b.y);
}
inline __device__ __host__ void operator -=(float2 & b, float2 a)
{
b.x -= a.x;
b.y -= a.y;
}
inline __device__ __host__ float2 operator /(float2 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float2 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
}
inline __device__ __host__ float dot(float2 a, float2 b)
{
return a.x * b.x + a.y * b.y;
}
inline __device__ __host__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
inline __device__ __host__ float dot(float4 a, float4 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
}
inline __device__ __host__ float clamp(float f, float a, float b)
{
return max(a, min(f, b));
}
inline __device__ __host__ float3 clamp(float3 v, float a, float b)
{
return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
{
return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
inline __device__ __host__ float3 normalize(float3 v)
{
float len = 1.0f / sqrtf(dot(v, v));
return make_float3(v.x * len, v.y * len, v.z * len);
}
// Use power method to find the first eigenvector.
// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html
inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
{
// 8 iterations seems to be more than enough.
float3 v = make_float3(1.0f, 1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
float m = max(max(x, y), z);
float iv = 1.0f / m;
if (m == 0.0f) iv = 0.0f;
v = make_float3(x*iv, y*iv, z*iv);
}
return v;
}
inline __device__ bool singleColor(const float3 * colors)
{
#if __DEVICE_EMULATION__
bool sameColor = false;
for (int i = 0; i < 16; i++)
{
sameColor &= (colors[idx] == colors[0]);
}
return sameColor;
#else
__shared__ int sameColor[16];
const int idx = threadIdx.x;
sameColor[idx] = (colors[idx] == colors[0]);
sameColor[idx] &= sameColor[idx^8];
sameColor[idx] &= sameColor[idx^4];
sameColor[idx] &= sameColor[idx^2];
sameColor[idx] &= sameColor[idx^1];
return sameColor[0];
#endif
}
inline __device__ void colorSums(const float3 * colors, float3 * sums)
{
#if __DEVICE_EMULATION__
float3 color_sum = make_float3(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// Math functions and operators to be used with vector types.
#ifndef CUDAMATH_H
#define CUDAMATH_H
#include <float.h>
inline __device__ __host__ float3 operator *(float3 a, float3 b)
{
return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
}
inline __device__ __host__ float3 operator *(float f, float3 v)
{
return make_float3(v.x*f, v.y*f, v.z*f);
}
inline __device__ __host__ float3 operator *(float3 v, float f)
{
return make_float3(v.x*f, v.y*f, v.z*f);
}
inline __device__ __host__ float3 operator +(float3 a, float3 b)
{
return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
}
inline __device__ __host__ void operator +=(float3 & b, float3 a)
{
b.x += a.x;
b.y += a.y;
b.z += a.z;
}
inline __device__ __host__ float3 operator -(float3 a, float3 b)
{
return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
}
inline __device__ __host__ void operator -=(float3 & b, float3 a)
{
b.x -= a.x;
b.y -= a.y;
b.z -= a.z;
}
inline __device__ __host__ float3 operator /(float3 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float3 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
b.z *= inv;
}
inline __device__ __host__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
inline __device__ __host__ float dot(float4 a, float4 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
}
inline __device__ __host__ float clamp(float f, float a, float b)
{
return max(a, min(f, b));
}
inline __device__ __host__ float3 clamp(float3 v, float a, float b)
{
return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
{
return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
inline __device__ __host__ float3 normalize(float3 v)
{
float len = 1.0f / sqrtf(dot(v, v));
return make_float3(v.x * len, v.y * len, v.z * len);
}
// Use power method to find the first eigenvector.
// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html
inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
{
// 8 iterations seems to be more than enough.
float3 v = make_float3(1.0f, 1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
float m = max(max(x, y), z);
float iv = 1.0f / m;
#if __DEVICE_EMULATION__
if (m == 0.0f) iv = 0.0f;
#endif
v = make_float3(x*iv, y*iv, z*iv);
}
return v;
}
inline __device__ void colorSums(const float3 * colors, float3 * sums)
{
#if __DEVICE_EMULATION__
float3 color_sum = make_float3(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx];
sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1];
#endif
}
inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, float3 colorMetric)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[6] = {0, 0, 0, 0, 0, 0};
for (int i = 0; i < 16; i++)
{
float3 a = (colors[i] - color_sum * (1.0f / 16.0f)) * colorMetric;
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[2] += a.x * a.z;
covariance[3] += a.y * a.y;
covariance[4] += a.y * a.z;
covariance[5] += a.z * a.z;
}
#else
const int idx = threadIdx.x;
float3 diff = (colors[idx] - color_sum * (1.0f / 16.0f)) * colorMetric;
// @@ Eliminate two-way bank conflicts here.
// @@ It seems that doing that and unrolling the reduction doesn't help...
__shared__ float covariance[16*6];
covariance[6 * idx + 0] = diff.x * diff.x; // 0, 6, 12, 2, 8, 14, 4, 10, 0
covariance[6 * idx + 1] = diff.x * diff.y;
covariance[6 * idx + 2] = diff.x * diff.z;
covariance[6 * idx + 3] = diff.y * diff.y;
covariance[6 * idx + 4] = diff.y * diff.z;
covariance[6 * idx + 5] = diff.z * diff.z;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[6 * idx + 0] += covariance[6 * (idx+d) + 0];
covariance[6 * idx + 1] += covariance[6 * (idx+d) + 1];
covariance[6 * idx + 2] += covariance[6 * (idx+d) + 2];
covariance[6 * idx + 3] += covariance[6 * (idx+d) + 3];
covariance[6 * idx + 4] += covariance[6 * (idx+d) + 4];
covariance[6 * idx + 5] += covariance[6 * (idx+d) + 5];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector(covariance);
}
// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
{
// @@ 8 iterations is probably more than enough.
float2 v = make_float2(1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1];
float y = v.x * matrix[1] + v.y * matrix[2];
float m = max(x, y);
float iv = 1.0f / m;
if (m == 0.0f) iv = 0.0f;
v = make_float2(x*iv, y*iv);
}
return v;
}
inline __device__ void colorSums(const float2 * colors, float2 * sums)
{
#if __DEVICE_EMULATION__
float2 color_sum = make_float2(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx];
sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1];
#endif
}
inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[3] = {0, 0, 0};
for (int i = 0; i < 16; i++)
{
float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[3] += a.y * a.y;
}
#else
const int idx = threadIdx.x;
float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
__shared__ float covariance[16*3];
covariance[3 * idx + 0] = diff.x * diff.x;
covariance[3 * idx + 1] = diff.x * diff.y;
covariance[3 * idx + 2] = diff.y * diff.y;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector2D(covariance);
}
#endif // CUDAMATH_H
#endif
}
inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, float3 colorMetric)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[6] = {0, 0, 0, 0, 0, 0};
for (int i = 0; i < 16; i++)
{
float3 a = (colors[i] - color_sum * (1.0f / 16.0f)) * colorMetric;
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[2] += a.x * a.z;
covariance[3] += a.y * a.y;
covariance[4] += a.y * a.z;
covariance[5] += a.z * a.z;
}
#else
const int idx = threadIdx.x;
float3 diff = (colors[idx] - color_sum * (1.0f / 16.0f)) * colorMetric;
// @@ Eliminate two-way bank conflicts here.
// @@ It seems that doing that and unrolling the reduction doesn't help...
__shared__ float covariance[16*6];
covariance[6 * idx + 0] = diff.x * diff.x; // 0, 6, 12, 2, 8, 14, 4, 10, 0
covariance[6 * idx + 1] = diff.x * diff.y;
covariance[6 * idx + 2] = diff.x * diff.z;
covariance[6 * idx + 3] = diff.y * diff.y;
covariance[6 * idx + 4] = diff.y * diff.z;
covariance[6 * idx + 5] = diff.z * diff.z;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[6 * idx + 0] += covariance[6 * (idx+d) + 0];
covariance[6 * idx + 1] += covariance[6 * (idx+d) + 1];
covariance[6 * idx + 2] += covariance[6 * (idx+d) + 2];
covariance[6 * idx + 3] += covariance[6 * (idx+d) + 3];
covariance[6 * idx + 4] += covariance[6 * (idx+d) + 4];
covariance[6 * idx + 5] += covariance[6 * (idx+d) + 5];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector(covariance);
}
#endif // CUDAMATH_H

View File

@ -75,9 +75,6 @@ namespace nvtt
Format_BC3n = Format_DXT5n,
Format_BC4, // ATI1
Format_BC5, // 3DC, ATI2
Format_DXT1n,
Format_CTX1,
};
/// Quality modes.

View File

@ -1,221 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <stdio.h> // printf
#include <stdlib.h> // rand
#include <time.h> // clock
#include <string.h> // memcpy, memcmp
#include <assert.h>
#define FRAME_COUNT 1000
#define WIDTH 2048
#define HEIGHT 2048
#define INPUT_SIZE (WIDTH*HEIGHT)
#define OUTPUT_SIZE (WIDTH*HEIGHT/16*2)
static int s_input[INPUT_SIZE];
static int s_reference[OUTPUT_SIZE];
static int s_output[OUTPUT_SIZE];
static int s_frame = 0;
struct MyOutputHandler : public nvtt::OutputHandler
{
MyOutputHandler() : m_ptr(NULL) {}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{
assert(size == sizeof(int) * OUTPUT_SIZE);
assert(width == WIDTH);
assert(height == HEIGHT);
assert(depth == 1);
assert(face == 0);
assert(miplevel == 0);
m_ptr = (unsigned char *)s_output;
if (s_frame == 1)
{
// Save first result as reference.
memcpy(s_reference, s_output, sizeof(int) * OUTPUT_SIZE);
}
else if (s_frame > 1)
{
// Compare against reference.
if (memcmp(s_output, s_reference, sizeof(int) * OUTPUT_SIZE) != 0)
{
printf("Compressed image different to original.\n");
exit(EXIT_FAILURE);
}
}
}
virtual bool writeData(const void * data, int size)
{
memcpy(m_ptr, data, size);
m_ptr += size;
return true;
}
unsigned char * m_ptr;
};
void precomp()
{
unsigned int bitmaps[1024];
int num = 0;
printf("{\n");
printf("\t%8X,\n", 0);
bitmaps[0] = 0;
num = 1;
for (int a = 1; a <= 15; a++)
{
for (int b = a; b <= 15; b++)
{
for (int c = b; c <= 15; c++)
{
int indices[16];
int i = 0;
for(; i < a; i++) {
indices[i] = 0;
}
for(; i < a+b; i++) {
indices[i] = 2;
}
for(; i < a+b+c; i++) {
indices[i] = 3;
}
for(; i < 16; i++) {
indices[i] = 1;
}
unsigned int bm = 0;
for(i = 0; i < 16; i++) {
bm |= indices[i] << (i * 2);
}
printf("\t0x%8X, // %d %d %d %d\n", bm, a-0, b-a, c-b, 16-c);
bitmaps[num] = bm;
num++;
}
}
}
printf("}\n");
printf("// num = %d\n", num);
/*
for( int i = imax; i >= 0; --i )
{
// second cluster [i,j) is one third along
for( int m = i; m < 16; ++m )
{
indices[m] = 2;
}
const int jmax = ( i == 0 ) ? 15 : 16;
for( int j = jmax; j >= i; --j )
{
// third cluster [j,k) is two thirds along
for( int m = j; m < 16; ++m )
{
indices[m] = 3;
}
int kmax = ( j == 0 ) ? 15 : 16;
for( int k = kmax; k >= j; --k )
{
// last cluster [k,n) is at the end
if( k < 16 )
{
indices[k] = 1;
}
uint bitmap = 0;
bool hasThree = false;
for(int p = 0; p < 16; p++) {
bitmap |= indices[p] << (p * 2);
}
bitmaps[num] = bitmap;
num++;
}
}
}
*/
}
int main(int argc, char *argv[])
{
//precomp();
nvtt::InputOptions inputOptions;
inputOptions.setTextureLayout(nvtt::TextureType_2D, WIDTH, HEIGHT);
for (int i = 0; i < INPUT_SIZE; i++)
{
s_input[i] = rand();
}
inputOptions.setMipmapData(s_input, WIDTH, HEIGHT);
inputOptions.setMipmapGeneration(false);
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(nvtt::Format_DXT1);
// compressionOptions.setFormat(nvtt::Format_DXT1n);
// compressionOptions.setFormat(nvtt::Format_CTX1);
nvtt::OutputOptions outputOptions;
outputOptions.setOutputHeader(false);
MyOutputHandler outputHandler;
outputOptions.setOutputHandler(&outputHandler);
nvtt::Compressor compressor;
for (s_frame = 0; s_frame < FRAME_COUNT; s_frame++)
{
clock_t start = clock();
printf("compressing frame %d:\n", s_frame);
compressor.process(inputOptions, compressionOptions, outputOptions);
clock_t end = clock();
printf("time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
}
return EXIT_SUCCESS;
}

View File

@ -130,13 +130,10 @@ struct NormalError
void done()
{
if (samples)
{
ade /= samples;
mse /= samples * 3;
rmse = sqrt(mse);
psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
}
ade /= samples;
mse /= samples * 3;
rmse = sqrt(mse);
psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
}
void print()