rename 2.0 tag to 2.0.0

2.0.0
castano 16 years ago
parent 7933aeea09
commit a1b655d0d5

@ -1,12 +1,3 @@
NVIDIA Texture Tools version 2.1.0
* CTX1 CUDA compressor.
* DXT1n CUDA compressor.
NVIDIA Texture Tools version 2.0.1
* Fix memory leaks.
* Pre-allocate device memory for CUDA compressor.
* Add single color compressor.
NVIDIA Texture Tools version 2.0.0 NVIDIA Texture Tools version 2.0.0
* Fixed PSNR formula in nvimgdiff. * Fixed PSNR formula in nvimgdiff.
* Added support for arbitrary RGB formats. * Added support for arbitrary RGB formats.

@ -2,7 +2,7 @@
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
NVIDIA Texture Tools NVIDIA Texture Tools
README.txt README.txt
Version 2.1 Version 2.0.0
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------

@ -1 +1 @@
2.1.0 2.0.0

Binary file not shown.

Binary file not shown.

@ -327,10 +327,6 @@
RelativePath="..\..\..\src\nvcore\nvcore.h" RelativePath="..\..\..\src\nvcore\nvcore.h"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvcore\Ptr.h"
>
</File>
<File <File
RelativePath="..\..\..\src\nvcore\StrLib.h" RelativePath="..\..\..\src\nvcore\StrLib.h"
> >

@ -3,18 +3,18 @@ Microsoft Visual Studio Solution File, Format Version 9.00
# Visual Studio 2005 # Visual Studio 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38} {CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}"
@ -34,16 +34,16 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvdd
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}"
@ -55,18 +55,13 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nv
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}" Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}"
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stress", "stress\stress.vcproj", "{317B694E-B5C1-42A6-956F-FC12B69175A6}"
ProjectSection(ProjectDependencies) = postProject
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
EndProjectSection
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug (no cuda)|Any CPU = Debug (no cuda)|Any CPU Debug (no cuda)|Any CPU = Debug (no cuda)|Any CPU
@ -319,22 +314,6 @@ Global
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.Build.0 = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.Build.0 = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|x64.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

@ -53,8 +53,8 @@ END
// //
VS_VERSION_INFO VERSIONINFO VS_VERSION_INFO VERSIONINFO
FILEVERSION 2,1,0,0 FILEVERSION 2,0,0,0
PRODUCTVERSION 2,1,0,0 PRODUCTVERSION 2,0,0,0
FILEFLAGSMASK 0x17L FILEFLAGSMASK 0x17L
#ifdef _DEBUG #ifdef _DEBUG
FILEFLAGS 0x1L FILEFLAGS 0x1L
@ -71,12 +71,12 @@ BEGIN
BEGIN BEGIN
VALUE "CompanyName", "NVIDIA Corporation" VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library" VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 1, 0, 0" VALUE "FileVersion", "2, 0, 0, 0"
VALUE "InternalName", "nvtt" VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007" VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "OriginalFilename", "nvtt.dll" VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library" VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 1, 0, 0" VALUE "ProductVersion", "2, 0, 0, 0"
END END
END END
BLOCK "VarFileInfo" BLOCK "VarFileInfo"

@ -1,201 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="stress"
ProjectGUID="{317B694E-B5C1-42A6-956F-FC12B69175A6}"
RootNamespace="stress"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\src\nvtt\tests\stress.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

@ -110,19 +110,6 @@ void ColorBlock::splatY()
} }
} }
/// Returns true if the block has a single color.
bool ColorBlock::isSingleColor() const
{
for(int i = 1; i < 16; i++)
{
if (m_color[0] != m_color[i])
{
return false;
}
}
return true;
}
/// Count number of unique colors in this color block. /// Count number of unique colors in this color block.
uint ColorBlock::countUniqueColors() const uint ColorBlock::countUniqueColors() const

@ -24,7 +24,6 @@ namespace nv
void splatX(); void splatX();
void splatY(); void splatY();
bool isSingleColor() const;
uint countUniqueColors() const; uint countUniqueColors() const;
Color32 averageColor() const; Color32 averageColor() const;
bool hasAlpha() const; bool hasAlpha() const;

@ -79,9 +79,6 @@ TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h) ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage) TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(stress tests/stress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(stress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h) ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage) TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)

@ -69,14 +69,7 @@ void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & ou
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(image, x, y);
if (rgba.isSingleColor()) QuickCompress::compressDXT1(rgba, &block);
{
QuickCompress::compressDXT1(rgba.color(0), &block);
}
else
{
QuickCompress::compressDXT1(rgba, &block);
}
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -221,16 +214,10 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
rgba.init(image, x, y); rgba.init(image, x, y);
if (rgba.isSingleColor()) // Compress color.
{ squish::ColourSet colours((uint8 *)rgba.colors(), 0);
QuickCompress::compressDXT1(rgba.color(0), &block); fit.SetColourSet(&colours, squish::kDxt1);
} fit.Compress(&block);
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));

@ -56,7 +56,7 @@ namespace
static int blockSize(Format format) static int blockSize(Format format)
{ {
if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) { if (format == Format_DXT1 || format == Format_DXT1a) {
return 8; return 8;
} }
else if (format == Format_DXT3) { else if (format == Format_DXT3) {
@ -71,9 +71,6 @@ namespace
else if (format == Format_BC5) { else if (format == Format_BC5) {
return 16; return 16;
} }
else if (format == Format_CTX1) {
return 8;
}
return 0; return 0;
} }
@ -208,19 +205,16 @@ namespace nvtt
Compressor::Compressor() : m(*new Compressor::Private()) Compressor::Compressor() : m(*new Compressor::Private())
{ {
// CUDA initialization.
m.cudaSupported = cuda::isHardwarePresent(); m.cudaSupported = cuda::isHardwarePresent();
m.cudaEnabled = m.cudaSupported; m.cudaEnabled = m.cudaSupported;
if (m.cudaEnabled) // @@ Do CUDA initialization here.
{
m.cuda = new CudaCompressor();
}
} }
Compressor::~Compressor() Compressor::~Compressor()
{ {
delete &m; // @@ Free CUDA resources here.
} }
@ -231,11 +225,6 @@ void Compressor::enableCudaAcceleration(bool enable)
{ {
m.cudaEnabled = enable; m.cudaEnabled = enable;
} }
if (m.cudaEnabled && m.cuda == NULL)
{
m.cuda = new CudaCompressor();
}
} }
/// Check if CUDA acceleration is enabled. /// Check if CUDA acceleration is enabled.
@ -336,7 +325,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
{ {
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format)); header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) { if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1'); header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true); if (inputOptions.isNormalMap) header.setNormalFlag(true);
} }
@ -357,10 +346,6 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
header.setFourCC('A', 'T', 'I', '2'); header.setFourCC('A', 'T', 'I', '2');
if (inputOptions.isNormalMap) header.setNormalFlag(true); if (inputOptions.isNormalMap) header.setNormalFlag(true);
} }
else if (compressionOptions.format == Format_CTX1) {
header.setFourCC('C', 'T', 'X', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
} }
// Swap bytes if necessary. // Swap bytes if necessary.
@ -685,7 +670,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
if (cudaEnabled) if (cudaEnabled)
{ {
nvDebugCheck(cudaSupported); nvDebugCheck(cudaSupported);
cuda->compressDXT1(image, outputOptions, compressionOptions); cudaCompressDXT1(image, outputOptions, compressionOptions);
} }
else else
{ {
@ -712,18 +697,6 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
} }
} }
} }
else if (compressionOptions.format == Format_DXT1n)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT1n(image, outputOptions, compressionOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
}
else if (compressionOptions.format == Format_DXT3) else if (compressionOptions.format == Format_DXT3)
{ {
if (compressionOptions.quality == Quality_Fastest) if (compressionOptions.quality == Quality_Fastest)
@ -735,7 +708,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
if (cudaEnabled) if (cudaEnabled)
{ {
nvDebugCheck(cudaSupported); nvDebugCheck(cudaSupported);
cuda->compressDXT3(image, outputOptions, compressionOptions); cudaCompressDXT3(image, outputOptions, compressionOptions);
} }
else else
{ {
@ -754,7 +727,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
if (cudaEnabled) if (cudaEnabled)
{ {
nvDebugCheck(cudaSupported); nvDebugCheck(cudaSupported);
cuda->compressDXT5(image, outputOptions, compressionOptions); cudaCompressDXT5(image, outputOptions, compressionOptions);
} }
else else
{ {
@ -781,18 +754,6 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
{ {
compressBC5(image, outputOptions, compressionOptions); compressBC5(image, outputOptions, compressionOptions);
} }
else if (compressionOptions.format == Format_CTX1)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressCTX1(image, outputOptions, compressionOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
}
return true; return true;
} }

@ -24,10 +24,6 @@
#ifndef NV_TT_COMPRESSOR_H #ifndef NV_TT_COMPRESSOR_H
#define NV_TT_COMPRESSOR_H #define NV_TT_COMPRESSOR_H
#include <nvcore/Ptr.h>
#include <nvtt/cuda/CudaCompressDXT.h>
#include "nvtt.h" #include "nvtt.h"
namespace nv namespace nv
@ -67,9 +63,6 @@ namespace nvtt
bool cudaSupported; bool cudaSupported;
bool cudaEnabled; bool cudaEnabled;
nv::AutoPtr<nv::CudaCompressor> cuda;
}; };
} // nvtt namespace } // nvtt namespace

@ -353,18 +353,12 @@ static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock) void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{ {
dxtBlock->col0.r = OMatch5[c.r][0]; dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0]; dxtBlock->col0.g = OMatch5[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0]; dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1]; dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1]; dxtBlock->col1.g = OMatch5[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1]; dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa; dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
} }
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)

@ -48,12 +48,7 @@ void initTables()
}; };
*/ */
#if __CUDACC__ const static uint8 OMatch5[256][2] =
__constant__ unsigned short
#else
const static uint8
#endif
OMatch5[256][2] =
{ {
{0x00, 0x00}, {0x00, 0x00},
{0x00, 0x00}, {0x00, 0x00},
@ -313,12 +308,7 @@ OMatch5[256][2] =
{0x1F, 0x1F}, {0x1F, 0x1F},
}; };
#if __CUDACC__ const static uint8 OMatch6[256][2] =
__constant__ unsigned short
#else
const static uint8
#endif
OMatch6[256][2] =
{ {
{0x00, 0x00}, {0x00, 0x00},
{0x00, 0x01}, {0x00, 0x01},

@ -122,7 +122,7 @@ static void doPrecomputation()
*/ */
const static uint s_bitmapTable[992] = const static uint bitmaps[992] =
{ {
0x80000000, 0x80000000,
0x40000000, 0x40000000,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -31,24 +31,11 @@ namespace nv
{ {
class Image; class Image;
class CudaCompressor void cudaCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
{ void cudaCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
public: void cudaCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
CudaCompressor();
~CudaCompressor(); void cudaCompressDXT1_2(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1n(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressCTX1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
private:
uint * m_bitmapTable;
uint * m_data;
uint * m_result;
};
} // nv namespace } // nv namespace

@ -1,389 +1,221 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com> // Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
// //
// Permission is hereby granted, free of charge, to any person // Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation // obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without // files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use, // restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell // copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the // copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following // Software is furnished to do so, subject to the following
// conditions: // conditions:
// //
// The above copyright notice and this permission notice shall be // The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software. // included in all copies or substantial portions of the Software.
// //
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// Math functions and operators to be used with vector types. // Math functions and operators to be used with vector types.
#ifndef CUDAMATH_H #ifndef CUDAMATH_H
#define CUDAMATH_H #define CUDAMATH_H
#include <float.h> #include <float.h>
inline __device__ __host__ float3 operator *(float3 a, float3 b) inline __device__ __host__ float3 operator *(float3 a, float3 b)
{ {
return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
} }
inline __device__ __host__ float3 operator *(float f, float3 v) inline __device__ __host__ float3 operator *(float f, float3 v)
{ {
return make_float3(v.x*f, v.y*f, v.z*f); return make_float3(v.x*f, v.y*f, v.z*f);
} }
inline __device__ __host__ float3 operator *(float3 v, float f) inline __device__ __host__ float3 operator *(float3 v, float f)
{ {
return make_float3(v.x*f, v.y*f, v.z*f); return make_float3(v.x*f, v.y*f, v.z*f);
} }
inline __device__ __host__ float3 operator +(float3 a, float3 b) inline __device__ __host__ float3 operator +(float3 a, float3 b)
{ {
return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
} }
inline __device__ __host__ void operator +=(float3 & b, float3 a) inline __device__ __host__ void operator +=(float3 & b, float3 a)
{ {
b.x += a.x; b.x += a.x;
b.y += a.y; b.y += a.y;
b.z += a.z; b.z += a.z;
} }
inline __device__ __host__ float3 operator -(float3 a, float3 b) inline __device__ __host__ float3 operator -(float3 a, float3 b)
{ {
return make_float3(a.x-b.x, a.y-b.y, a.z-b.z); return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
} }
inline __device__ __host__ void operator -=(float3 & b, float3 a) inline __device__ __host__ void operator -=(float3 & b, float3 a)
{ {
b.x -= a.x; b.x -= a.x;
b.y -= a.y; b.y -= a.y;
b.z -= a.z; b.z -= a.z;
} }
inline __device__ __host__ float3 operator /(float3 v, float f) inline __device__ __host__ float3 operator /(float3 v, float f)
{ {
float inv = 1.0f / f; float inv = 1.0f / f;
return v * inv; return v * inv;
} }
inline __device__ __host__ void operator /=(float3 & b, float f) inline __device__ __host__ void operator /=(float3 & b, float f)
{ {
float inv = 1.0f / f; float inv = 1.0f / f;
b.x *= inv; b.x *= inv;
b.y *= inv; b.y *= inv;
b.z *= inv; b.z *= inv;
} }
inline __device__ __host__ bool operator ==(float3 a, float3 b)
{ inline __device__ __host__ float dot(float3 a, float3 b)
return a.x == b.x && a.y == b.y && a.z == b.z; {
} return a.x * b.x + a.y * b.y + a.z * b.z;
}
// float2 operators inline __device__ __host__ float dot(float4 a, float4 b)
inline __device__ __host__ float2 operator *(float2 a, float2 b) {
{ return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
return make_float2(a.x*b.x, a.y*b.y); }
}
inline __device__ __host__ float clamp(float f, float a, float b)
inline __device__ __host__ float2 operator *(float f, float2 v) {
{ return max(a, min(f, b));
return make_float2(v.x*f, v.y*f); }
}
inline __device__ __host__ float3 clamp(float3 v, float a, float b)
inline __device__ __host__ float2 operator *(float2 v, float f) {
{ return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
return make_float2(v.x*f, v.y*f); }
}
inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
inline __device__ __host__ float2 operator +(float2 a, float2 b) {
{ return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
return make_float2(a.x+b.x, a.y+b.y); }
}
inline __device__ __host__ void operator +=(float2 & b, float2 a) inline __device__ __host__ float3 normalize(float3 v)
{ {
b.x += a.x; float len = 1.0f / sqrtf(dot(v, v));
b.y += a.y; return make_float3(v.x * len, v.y * len, v.z * len);
} }
inline __device__ __host__ float2 operator -(float2 a, float2 b)
{
return make_float2(a.x-b.x, a.y-b.y);
} // Use power method to find the first eigenvector.
// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html
inline __device__ __host__ void operator -=(float2 & b, float2 a) inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
{ {
b.x -= a.x; // 8 iterations seems to be more than enough.
b.y -= a.y;
} float3 v = make_float3(1.0f, 1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
inline __device__ __host__ float2 operator /(float2 v, float f) float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
{ float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
float inv = 1.0f / f; float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
return v * inv; float m = max(max(x, y), z);
} float iv = 1.0f / m;
#if __DEVICE_EMULATION__
inline __device__ __host__ void operator /=(float2 & b, float f) if (m == 0.0f) iv = 0.0f;
{ #endif
float inv = 1.0f / f; v = make_float3(x*iv, y*iv, z*iv);
b.x *= inv; }
b.y *= inv;
} return v;
}
inline __device__ __host__ float dot(float2 a, float2 b) inline __device__ void colorSums(const float3 * colors, float3 * sums)
{ {
return a.x * b.x + a.y * b.y; #if __DEVICE_EMULATION__
} float3 color_sum = make_float3(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
inline __device__ __host__ float dot(float3 a, float3 b) {
{ color_sum += colors[i];
return a.x * b.x + a.y * b.y + a.z * b.z; }
}
for (int i = 0; i < 16; i++)
inline __device__ __host__ float dot(float4 a, float4 b) {
{ sums[i] = color_sum;
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; }
} #else
inline __device__ __host__ float clamp(float f, float a, float b) const int idx = threadIdx.x;
{
return max(a, min(f, b));
}
inline __device__ __host__ float3 clamp(float3 v, float a, float b)
{
return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
{
return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
inline __device__ __host__ float3 normalize(float3 v)
{
float len = 1.0f / sqrtf(dot(v, v));
return make_float3(v.x * len, v.y * len, v.z * len);
}
// Use power method to find the first eigenvector.
// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html
inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
{
// 8 iterations seems to be more than enough.
float3 v = make_float3(1.0f, 1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
float m = max(max(x, y), z);
float iv = 1.0f / m;
if (m == 0.0f) iv = 0.0f;
v = make_float3(x*iv, y*iv, z*iv);
}
return v;
}
inline __device__ bool singleColor(const float3 * colors)
{
#if __DEVICE_EMULATION__
bool sameColor = false;
for (int i = 0; i < 16; i++)
{
sameColor &= (colors[idx] == colors[0]);
}
return sameColor;
#else
__shared__ int sameColor[16];
const int idx = threadIdx.x;
sameColor[idx] = (colors[idx] == colors[0]);
sameColor[idx] &= sameColor[idx^8];
sameColor[idx] &= sameColor[idx^4];
sameColor[idx] &= sameColor[idx^2];
sameColor[idx] &= sameColor[idx^1];
return sameColor[0];
#endif
}
inline __device__ void colorSums(const float3 * colors, float3 * sums)
{
#if __DEVICE_EMULATION__
float3 color_sum = make_float3(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx]; sums[idx] = colors[idx];
sums[idx] += sums[idx^8]; sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4]; sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2]; sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1]; sums[idx] += sums[idx^1];
#endif #endif
} }
inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, float3 colorMetric) inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, float3 colorMetric)
{ {
// Compute covariance matrix of the given colors. // Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__ #if __DEVICE_EMULATION__
float covariance[6] = {0, 0, 0, 0, 0, 0}; float covariance[6] = {0, 0, 0, 0, 0, 0};
for (int i = 0; i < 16; i++) for (int i = 0; i < 16; i++)
{ {
float3 a = (colors[i] - color_sum * (1.0f / 16.0f)) * colorMetric; float3 a = (colors[i] - color_sum * (1.0f / 16.0f)) * colorMetric;
covariance[0] += a.x * a.x; covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y; covariance[1] += a.x * a.y;
covariance[2] += a.x * a.z; covariance[2] += a.x * a.z;
covariance[3] += a.y * a.y; covariance[3] += a.y * a.y;
covariance[4] += a.y * a.z; covariance[4] += a.y * a.z;
covariance[5] += a.z * a.z; covariance[5] += a.z * a.z;
} }
#else #else
const int idx = threadIdx.x; const int idx = threadIdx.x;
float3 diff = (colors[idx] - color_sum * (1.0f / 16.0f)) * colorMetric; float3 diff = (colors[idx] - color_sum * (1.0f / 16.0f)) * colorMetric;
// @@ Eliminate two-way bank conflicts here. // @@ Eliminate two-way bank conflicts here.
// @@ It seems that doing that and unrolling the reduction doesn't help... // @@ It seems that doing that and unrolling the reduction doesn't help...
__shared__ float covariance[16*6]; __shared__ float covariance[16*6];
covariance[6 * idx + 0] = diff.x * diff.x; // 0, 6, 12, 2, 8, 14, 4, 10, 0 covariance[6 * idx + 0] = diff.x * diff.x; // 0, 6, 12, 2, 8, 14, 4, 10, 0
covariance[6 * idx + 1] = diff.x * diff.y; covariance[6 * idx + 1] = diff.x * diff.y;
covariance[6 * idx + 2] = diff.x * diff.z; covariance[6 * idx + 2] = diff.x * diff.z;
covariance[6 * idx + 3] = diff.y * diff.y; covariance[6 * idx + 3] = diff.y * diff.y;
covariance[6 * idx + 4] = diff.y * diff.z; covariance[6 * idx + 4] = diff.y * diff.z;
covariance[6 * idx + 5] = diff.z * diff.z; covariance[6 * idx + 5] = diff.z * diff.z;
for(int d = 8; d > 0; d >>= 1) for(int d = 8; d > 0; d >>= 1)
{ {
if (idx < d) if (idx < d)
{ {
covariance[6 * idx + 0] += covariance[6 * (idx+d) + 0]; covariance[6 * idx + 0] += covariance[6 * (idx+d) + 0];
covariance[6 * idx + 1] += covariance[6 * (idx+d) + 1]; covariance[6 * idx + 1] += covariance[6 * (idx+d) + 1];
covariance[6 * idx + 2] += covariance[6 * (idx+d) + 2]; covariance[6 * idx + 2] += covariance[6 * (idx+d) + 2];
covariance[6 * idx + 3] += covariance[6 * (idx+d) + 3]; covariance[6 * idx + 3] += covariance[6 * (idx+d) + 3];
covariance[6 * idx + 4] += covariance[6 * (idx+d) + 4]; covariance[6 * idx + 4] += covariance[6 * (idx+d) + 4];
covariance[6 * idx + 5] += covariance[6 * (idx+d) + 5]; covariance[6 * idx + 5] += covariance[6 * (idx+d) + 5];
} }
} }
#endif #endif
// Compute first eigen vector. // Compute first eigen vector.
return firstEigenVector(covariance); return firstEigenVector(covariance);
} }
// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] ) #endif // CUDAMATH_H
{
// @@ 8 iterations is probably more than enough.
float2 v = make_float2(1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1];
float y = v.x * matrix[1] + v.y * matrix[2];
float m = max(x, y);
float iv = 1.0f / m;
if (m == 0.0f) iv = 0.0f;
v = make_float2(x*iv, y*iv);
}
return v;
}
inline __device__ void colorSums(const float2 * colors, float2 * sums)
{
#if __DEVICE_EMULATION__
float2 color_sum = make_float2(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx];
sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1];
#endif
}
inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[3] = {0, 0, 0};
for (int i = 0; i < 16; i++)
{
float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[3] += a.y * a.y;
}
#else
const int idx = threadIdx.x;
float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
__shared__ float covariance[16*3];
covariance[3 * idx + 0] = diff.x * diff.x;
covariance[3 * idx + 1] = diff.x * diff.y;
covariance[3 * idx + 2] = diff.y * diff.y;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector2D(covariance);
}
#endif // CUDAMATH_H

@ -75,9 +75,6 @@ namespace nvtt
Format_BC3n = Format_DXT5n, Format_BC3n = Format_DXT5n,
Format_BC4, // ATI1 Format_BC4, // ATI1
Format_BC5, // 3DC, ATI2 Format_BC5, // 3DC, ATI2
Format_DXT1n,
Format_CTX1,
}; };
/// Quality modes. /// Quality modes.

@ -1,221 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <stdio.h> // printf
#include <stdlib.h> // rand
#include <time.h> // clock
#include <string.h> // memcpy, memcmp
#include <assert.h>
#define FRAME_COUNT 1000
#define WIDTH 2048
#define HEIGHT 2048
#define INPUT_SIZE (WIDTH*HEIGHT)
#define OUTPUT_SIZE (WIDTH*HEIGHT/16*2)
static int s_input[INPUT_SIZE];
static int s_reference[OUTPUT_SIZE];
static int s_output[OUTPUT_SIZE];
static int s_frame = 0;
struct MyOutputHandler : public nvtt::OutputHandler
{
MyOutputHandler() : m_ptr(NULL) {}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{
assert(size == sizeof(int) * OUTPUT_SIZE);
assert(width == WIDTH);
assert(height == HEIGHT);
assert(depth == 1);
assert(face == 0);
assert(miplevel == 0);
m_ptr = (unsigned char *)s_output;
if (s_frame == 1)
{
// Save first result as reference.
memcpy(s_reference, s_output, sizeof(int) * OUTPUT_SIZE);
}
else if (s_frame > 1)
{
// Compare against reference.
if (memcmp(s_output, s_reference, sizeof(int) * OUTPUT_SIZE) != 0)
{
printf("Compressed image different to original.\n");
exit(EXIT_FAILURE);
}
}
}
virtual bool writeData(const void * data, int size)
{
memcpy(m_ptr, data, size);
m_ptr += size;
return true;
}
unsigned char * m_ptr;
};
void precomp()
{
unsigned int bitmaps[1024];
int num = 0;
printf("{\n");
printf("\t%8X,\n", 0);
bitmaps[0] = 0;
num = 1;
for (int a = 1; a <= 15; a++)
{
for (int b = a; b <= 15; b++)
{
for (int c = b; c <= 15; c++)
{
int indices[16];
int i = 0;
for(; i < a; i++) {
indices[i] = 0;
}
for(; i < a+b; i++) {
indices[i] = 2;
}
for(; i < a+b+c; i++) {
indices[i] = 3;
}
for(; i < 16; i++) {
indices[i] = 1;
}
unsigned int bm = 0;
for(i = 0; i < 16; i++) {
bm |= indices[i] << (i * 2);
}
printf("\t0x%8X, // %d %d %d %d\n", bm, a-0, b-a, c-b, 16-c);
bitmaps[num] = bm;
num++;
}
}
}
printf("}\n");
printf("// num = %d\n", num);
/*
for( int i = imax; i >= 0; --i )
{
// second cluster [i,j) is one third along
for( int m = i; m < 16; ++m )
{
indices[m] = 2;
}
const int jmax = ( i == 0 ) ? 15 : 16;
for( int j = jmax; j >= i; --j )
{
// third cluster [j,k) is two thirds along
for( int m = j; m < 16; ++m )
{
indices[m] = 3;
}
int kmax = ( j == 0 ) ? 15 : 16;
for( int k = kmax; k >= j; --k )
{
// last cluster [k,n) is at the end
if( k < 16 )
{
indices[k] = 1;
}
uint bitmap = 0;
bool hasThree = false;
for(int p = 0; p < 16; p++) {
bitmap |= indices[p] << (p * 2);
}
bitmaps[num] = bitmap;
num++;
}
}
}
*/
}
int main(int argc, char *argv[])
{
//precomp();
nvtt::InputOptions inputOptions;
inputOptions.setTextureLayout(nvtt::TextureType_2D, WIDTH, HEIGHT);
for (int i = 0; i < INPUT_SIZE; i++)
{
s_input[i] = rand();
}
inputOptions.setMipmapData(s_input, WIDTH, HEIGHT);
inputOptions.setMipmapGeneration(false);
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(nvtt::Format_DXT1);
// compressionOptions.setFormat(nvtt::Format_DXT1n);
// compressionOptions.setFormat(nvtt::Format_CTX1);
nvtt::OutputOptions outputOptions;
outputOptions.setOutputHeader(false);
MyOutputHandler outputHandler;
outputOptions.setOutputHandler(&outputHandler);
nvtt::Compressor compressor;
for (s_frame = 0; s_frame < FRAME_COUNT; s_frame++)
{
clock_t start = clock();
printf("compressing frame %d:\n", s_frame);
compressor.process(inputOptions, compressionOptions, outputOptions);
clock_t end = clock();
printf("time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
}
return EXIT_SUCCESS;
}

@ -130,13 +130,10 @@ struct NormalError
void done() void done()
{ {
if (samples) ade /= samples;
{ mse /= samples * 3;
ade /= samples; rmse = sqrt(mse);
mse /= samples * 3; psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
rmse = sqrt(mse);
psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
}
} }
void print() void print()

Loading…
Cancel
Save