DXT1a single color compressor. Fixes issue 131.

Init single color tables at startup.
pull/216/head
castano 14 years ago
parent da548fd03a
commit ac7c017c35

@ -6,6 +6,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj",
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
@ -80,6 +81,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "imperativeapi", "imperative
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc6h", "bc6h\bc6h.vcproj", "{C33787E3-5564-4834-9FE3-A9020455A669}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug (no cuda)|Mixed Platforms = Debug (no cuda)|Mixed Platforms
@ -426,6 +429,30 @@ Global
{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release|Win32.Build.0 = Release|Win32
{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release|x64.ActiveCfg = Release|x64
{7DCF280E-702B-49F3-84A7-AE7E146384D6}.Release|x64.Build.0 = Release|x64
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug (no cuda)|x64.Build.0 = Debug|x64
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|Mixed Platforms.Build.0 = Debug|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|Win32.ActiveCfg = Debug|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|Win32.Build.0 = Debug|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|x64.ActiveCfg = Debug|x64
{C33787E3-5564-4834-9FE3-A9020455A669}.Debug|x64.Build.0 = Debug|x64
{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|Mixed Platforms.Build.0 = Release|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|Win32.Build.0 = Release|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|x64.ActiveCfg = Release|x64
{C33787E3-5564-4834-9FE3-A9020455A669}.Release (no cuda)|x64.Build.0 = Release|x64
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|Mixed Platforms.ActiveCfg = Release|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|Mixed Platforms.Build.0 = Release|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|Win32.ActiveCfg = Release|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|Win32.Build.0 = Release|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.ActiveCfg = Release|x64
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

@ -770,6 +770,10 @@
RelativePath="..\..\..\src\nvtt\CompressorDX11.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\CompressorDX9.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\CompressorDXT.cpp"
>
@ -886,6 +890,10 @@
RelativePath="..\..\..\src\nvtt\QuickCompressDXT.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\SingleColorLookup.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\TexImage.cpp"
>
@ -916,6 +924,10 @@
RelativePath="..\..\..\src\nvtt\CompressorDX11.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\CompressorDX9.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\CompressorDXT.h"
>

@ -281,86 +281,58 @@
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
<File
RelativePath="..\..\..\src\nvtt\squish\colourblock.cpp"
>
<File
RelativePath="..\..\..\src\nvtt\squish\clusterfit.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\colourblock.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\colourfit.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\colourset.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\fastclusterfit.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\maths.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\weightedclusterfit.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\colourblock.h"
>
<File
RelativePath="..\..\..\src\nvtt\squish\clusterfit.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\colourblock.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\colourfit.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\colourset.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\fastclusterfit.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\maths.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\simd.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\simd_sse.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\simd_ve.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\weightedclusterfit.h"
>
</File>
</Filter>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\colourfit.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\colourfit.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\colourset.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\colourset.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\maths.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\maths.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\simd.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\simd_sse.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\simd_ve.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\weightedclusterfit.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\squish\weightedclusterfit.h"
>
</File>
</Files>
<Globals>
</Globals>

@ -29,7 +29,6 @@
// squish
#include "squish/colourset.h"
#include "squish/fastclusterfit.h"
#include "squish/weightedclusterfit.h"
#include "nvtt.h"
@ -130,21 +129,18 @@ void NormalCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alph
void NormalCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
bool anyAlpha = false;
bool allAlpha = true;
uint alphaMask = 0;
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a < 128) anyAlpha = true;
else allAlpha = false;
if (rgba.color(i).a < 128) alphaMask |= (3 << (i * 2)); // Set two bits for each color.
}
const bool isSingleColor = rgba.isSingleColor();
if ((!anyAlpha && isSingleColor || allAlpha))
if (isSingleColor)
{
BlockDXT1 * block = new(output) BlockDXT1;
OptimalCompress::compressDXT1a(rgba.color(0), block);
OptimalCompress::compressDXT1a(rgba.color(0), alphaMask, block);
}
else
{

@ -266,18 +266,27 @@ void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
}
}
void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
void OptimalCompress::compressDXT1a(Color32 c, uint alphaMask, BlockDXT1 * dxtBlock)
{
if (rgba.a < 128)
{
dxtBlock->col0.u = 0;
dxtBlock->col1.u = 0;
dxtBlock->indices = 0xFFFFFFFF;
}
else
{
compressDXT1(rgba, dxtBlock);
}
if (alphaMask == 0) {
compressDXT1(c, dxtBlock);
}
else {
dxtBlock->col0.r = OMatchAlpha5[c.r][0];
dxtBlock->col0.g = OMatchAlpha6[c.g][0];
dxtBlock->col0.b = OMatchAlpha5[c.b][0];
dxtBlock->col1.r = OMatchAlpha5[c.r][1];
dxtBlock->col1.g = OMatchAlpha6[c.g][1];
dxtBlock->col1.b = OMatchAlpha5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa; // 0b1010..1010
if (dxtBlock->col0.u > dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
}
dxtBlock->indices |= alphaMask;
}
}
void OptimalCompress::compressDXT1G(uint8 g, BlockDXT1 * dxtBlock)

@ -40,7 +40,7 @@ namespace nv
namespace OptimalCompress
{
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, uint alphaMask, BlockDXT1 * dxtBlock);
void compressDXT1G(uint8 g, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);

@ -0,0 +1,90 @@
#include "SingleColorLookup.h"
#include "nvcore/Debug.h"
#include <stdlib.h> // abs
// Globals
uint8 OMatch5[256][2];
uint8 OMatch6[256][2];
uint8 OMatchAlpha5[256][2];
uint8 OMatchAlpha6[256][2];
static int Mul8Bit(int a, int b)
{
int t = a * b + 128;
return (t + (t >> 8)) >> 8;
}
static inline int Lerp13(int a, int b)
{
#ifdef DXT_USE_ROUNDING_BIAS
// with rounding bias
return a + Mul8Bit(b-a, 0x55);
#else
// without rounding bias
// replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
return (a * 2 + b) / 3;
#endif
}
static void PrepareOptTable(uint8 * table, const uint8 * expand, int size, bool alpha_mode)
{
for (int i = 0; i < 256; i++)
{
int bestErr = 256 * 100;
for (int min = 0; min < size; min++)
{
for (int max = 0; max < size; max++)
{
int mine = expand[min];
int maxe = expand[max];
int err;
if (alpha_mode) err = abs((maxe + mine)/2 - i);
else err = abs(Lerp13(maxe, mine) - i);
err *= 100;
// DX10 spec says that interpolation must be within 3% of "correct" result,
// add this as error term. (normally we'd expect a random distribution of
// +-1.5% error, but nowhere in the spec does it say that the error has to be
// unbiased - better safe than sorry).
err += abs(max - min) * 3;
if (err < bestErr)
{
table[i*2+0] = max;
table[i*2+1] = min;
bestErr = err;
}
}
}
}
}
NV_AT_STARTUP(initSingleColorLookup());
void initSingleColorLookup()
{
uint8 expand5[32];
uint8 expand6[64];
for (int i = 0; i < 32; i++) {
expand5[i] = (i<<3) | (i>>2);
}
for (int i = 0; i < 64; i++) {
expand6[i] = (i<<2) | (i>>4);
}
PrepareOptTable(&OMatch5[0][0], expand5, 32, false);
PrepareOptTable(&OMatch6[0][0], expand6, 64, false);
PrepareOptTable(&OMatchAlpha5[0][0], expand5, 32, true);
PrepareOptTable(&OMatchAlpha6[0][0], expand6, 64, true);
};

@ -1,588 +1,9 @@
/*
typedef unsigned char uint8;
#include "nvcore/nvcore.h" // uint8
static int Mul8Bit(int a, int b)
{
int t = a * b + 128;
return (t + (t >> 8)) >> 8;
}
static inline int Lerp13(int fm, int to)
{
return (fm * 2 + to) / 3;
}
static void PrepareOptTable(uint8 * Table, const uint8 * expand, int size)
{
for (int i = 0; i < 256; i++)
{
float bestErr = 256;
for (int min = 0; min < size; min++)
{
for (int max = 0; max < size; max++)
{
int mine = expand[min];
int maxe = expand[max];
float err = abs(maxe + Mul8Bit(mine-maxe, 0x55) - i);
err += 0.03f * abs(max - min);
if (err < bestErr)
{
Table[i*2+0] = max;
Table[i*2+1] = min;
bestErr = err;
}
}
}
}
}
void initTables()
{
uint8 Expand5[32];
uint8 Expand6[64];
for(sInt i=0;i<32;i++)
Expand5[i] = (i<<3)|(i>>2);
for(sInt i=0;i<64;i++)
Expand6[i] = (i<<2)|(i>>4);
PrepareOptTable(OMatch5, Expand5, 32)
PrepareOptTable(OMatch6, Expand6, 64)
};
*/
#if __CUDACC__
__constant__ unsigned short
#else
const static uint8
#endif
OMatch5[256][2] =
{
{0x00, 0x00},
{0x00, 0x00},
{0x00, 0x01},
{0x00, 0x01},
{0x01, 0x00},
{0x01, 0x00},
{0x01, 0x00},
{0x01, 0x01},
{0x01, 0x01},
{0x01, 0x01},
{0x01, 0x02},
{0x00, 0x04},
{0x02, 0x01},
{0x02, 0x01},
{0x02, 0x01},
{0x02, 0x02},
{0x02, 0x02},
{0x02, 0x02},
{0x02, 0x03},
{0x01, 0x05},
{0x03, 0x02},
{0x03, 0x02},
{0x04, 0x00},
{0x03, 0x03},
{0x03, 0x03},
{0x03, 0x03},
{0x03, 0x04},
{0x03, 0x04},
{0x03, 0x04},
{0x03, 0x05},
{0x04, 0x03},
{0x04, 0x03},
{0x05, 0x02},
{0x04, 0x04},
{0x04, 0x04},
{0x04, 0x05},
{0x04, 0x05},
{0x05, 0x04},
{0x05, 0x04},
{0x05, 0x04},
{0x06, 0x03},
{0x05, 0x05},
{0x05, 0x05},
{0x05, 0x06},
{0x04, 0x08},
{0x06, 0x05},
{0x06, 0x05},
{0x06, 0x05},
{0x06, 0x06},
{0x06, 0x06},
{0x06, 0x06},
{0x06, 0x07},
{0x05, 0x09},
{0x07, 0x06},
{0x07, 0x06},
{0x08, 0x04},
{0x07, 0x07},
{0x07, 0x07},
{0x07, 0x07},
{0x07, 0x08},
{0x07, 0x08},
{0x07, 0x08},
{0x07, 0x09},
{0x08, 0x07},
{0x08, 0x07},
{0x09, 0x06},
{0x08, 0x08},
{0x08, 0x08},
{0x08, 0x09},
{0x08, 0x09},
{0x09, 0x08},
{0x09, 0x08},
{0x09, 0x08},
{0x0A, 0x07},
{0x09, 0x09},
{0x09, 0x09},
{0x09, 0x0A},
{0x08, 0x0C},
{0x0A, 0x09},
{0x0A, 0x09},
{0x0A, 0x09},
{0x0A, 0x0A},
{0x0A, 0x0A},
{0x0A, 0x0A},
{0x0A, 0x0B},
{0x09, 0x0D},
{0x0B, 0x0A},
{0x0B, 0x0A},
{0x0C, 0x08},
{0x0B, 0x0B},
{0x0B, 0x0B},
{0x0B, 0x0B},
{0x0B, 0x0C},
{0x0B, 0x0C},
{0x0B, 0x0C},
{0x0B, 0x0D},
{0x0C, 0x0B},
{0x0C, 0x0B},
{0x0D, 0x0A},
{0x0C, 0x0C},
{0x0C, 0x0C},
{0x0C, 0x0D},
{0x0C, 0x0D},
{0x0D, 0x0C},
{0x0D, 0x0C},
{0x0D, 0x0C},
{0x0E, 0x0B},
{0x0D, 0x0D},
{0x0D, 0x0D},
{0x0D, 0x0E},
{0x0C, 0x10},
{0x0E, 0x0D},
{0x0E, 0x0D},
{0x0E, 0x0D},
{0x0E, 0x0E},
{0x0E, 0x0E},
{0x0E, 0x0E},
{0x0E, 0x0F},
{0x0D, 0x11},
{0x0F, 0x0E},
{0x0F, 0x0E},
{0x10, 0x0C},
{0x0F, 0x0F},
{0x0F, 0x0F},
{0x0F, 0x0F},
{0x0F, 0x10},
{0x0F, 0x10},
{0x0F, 0x10},
{0x0F, 0x11},
{0x10, 0x0F},
{0x10, 0x0F},
{0x11, 0x0E},
{0x10, 0x10},
{0x10, 0x10},
{0x10, 0x11},
{0x10, 0x11},
{0x11, 0x10},
{0x11, 0x10},
{0x11, 0x10},
{0x12, 0x0F},
{0x11, 0x11},
{0x11, 0x11},
{0x11, 0x12},
{0x10, 0x14},
{0x12, 0x11},
{0x12, 0x11},
{0x12, 0x11},
{0x12, 0x12},
{0x12, 0x12},
{0x12, 0x12},
{0x12, 0x13},
{0x11, 0x15},
{0x13, 0x12},
{0x13, 0x12},
{0x14, 0x10},
{0x13, 0x13},
{0x13, 0x13},
{0x13, 0x13},
{0x13, 0x14},
{0x13, 0x14},
{0x13, 0x14},
{0x13, 0x15},
{0x14, 0x13},
{0x14, 0x13},
{0x15, 0x12},
{0x14, 0x14},
{0x14, 0x14},
{0x14, 0x15},
{0x14, 0x15},
{0x15, 0x14},
{0x15, 0x14},
{0x15, 0x14},
{0x16, 0x13},
{0x15, 0x15},
{0x15, 0x15},
{0x15, 0x16},
{0x14, 0x18},
{0x16, 0x15},
{0x16, 0x15},
{0x16, 0x15},
{0x16, 0x16},
{0x16, 0x16},
{0x16, 0x16},
{0x16, 0x17},
{0x15, 0x19},
{0x17, 0x16},
{0x17, 0x16},
{0x18, 0x14},
{0x17, 0x17},
{0x17, 0x17},
{0x17, 0x17},
{0x17, 0x18},
{0x17, 0x18},
{0x17, 0x18},
{0x17, 0x19},
{0x18, 0x17},
{0x18, 0x17},
{0x19, 0x16},
{0x18, 0x18},
{0x18, 0x18},
{0x18, 0x19},
{0x18, 0x19},
{0x19, 0x18},
{0x19, 0x18},
{0x19, 0x18},
{0x1A, 0x17},
{0x19, 0x19},
{0x19, 0x19},
{0x19, 0x1A},
{0x18, 0x1C},
{0x1A, 0x19},
{0x1A, 0x19},
{0x1A, 0x19},
{0x1A, 0x1A},
{0x1A, 0x1A},
{0x1A, 0x1A},
{0x1A, 0x1B},
{0x19, 0x1D},
{0x1B, 0x1A},
{0x1B, 0x1A},
{0x1C, 0x18},
{0x1B, 0x1B},
{0x1B, 0x1B},
{0x1B, 0x1B},
{0x1B, 0x1C},
{0x1B, 0x1C},
{0x1B, 0x1C},
{0x1B, 0x1D},
{0x1C, 0x1B},
{0x1C, 0x1B},
{0x1D, 0x1A},
{0x1C, 0x1C},
{0x1C, 0x1C},
{0x1C, 0x1D},
{0x1C, 0x1D},
{0x1D, 0x1C},
{0x1D, 0x1C},
{0x1D, 0x1C},
{0x1E, 0x1B},
{0x1D, 0x1D},
{0x1D, 0x1D},
{0x1D, 0x1E},
{0x1D, 0x1E},
{0x1E, 0x1D},
{0x1E, 0x1D},
{0x1E, 0x1D},
{0x1E, 0x1E},
{0x1E, 0x1E},
{0x1E, 0x1E},
{0x1E, 0x1F},
{0x1E, 0x1F},
{0x1F, 0x1E},
{0x1F, 0x1E},
{0x1F, 0x1E},
{0x1F, 0x1F},
{0x1F, 0x1F},
};
#if __CUDACC__
__constant__ unsigned short
#else
const static uint8
#endif
OMatch6[256][2] =
{
{0x00, 0x00},
{0x00, 0x01},
{0x01, 0x00},
{0x01, 0x01},
{0x01, 0x01},
{0x01, 0x02},
{0x02, 0x01},
{0x02, 0x02},
{0x02, 0x02},
{0x02, 0x03},
{0x03, 0x02},
{0x03, 0x03},
{0x03, 0x03},
{0x03, 0x04},
{0x04, 0x03},
{0x04, 0x04},
{0x04, 0x04},
{0x04, 0x05},
{0x05, 0x04},
{0x05, 0x05},
{0x05, 0x05},
{0x05, 0x06},
{0x06, 0x05},
{0x00, 0x11},
{0x06, 0x06},
{0x06, 0x07},
{0x07, 0x06},
{0x02, 0x10},
{0x07, 0x07},
{0x07, 0x08},
{0x08, 0x07},
{0x03, 0x11},
{0x08, 0x08},
{0x08, 0x09},
{0x09, 0x08},
{0x05, 0x10},
{0x09, 0x09},
{0x09, 0x0A},
{0x0A, 0x09},
{0x06, 0x11},
{0x0A, 0x0A},
{0x0A, 0x0B},
{0x0B, 0x0A},
{0x08, 0x10},
{0x0B, 0x0B},
{0x0B, 0x0C},
{0x0C, 0x0B},
{0x09, 0x11},
{0x0C, 0x0C},
{0x0C, 0x0D},
{0x0D, 0x0C},
{0x0B, 0x10},
{0x0D, 0x0D},
{0x0D, 0x0E},
{0x0E, 0x0D},
{0x0C, 0x11},
{0x0E, 0x0E},
{0x0E, 0x0F},
{0x0F, 0x0E},
{0x0E, 0x10},
{0x0F, 0x0F},
{0x0F, 0x10},
{0x10, 0x0E},
{0x10, 0x0F},
{0x11, 0x0E},
{0x10, 0x10},
{0x10, 0x11},
{0x11, 0x10},
{0x12, 0x0F},
{0x11, 0x11},
{0x11, 0x12},
{0x12, 0x11},
{0x14, 0x0E},
{0x12, 0x12},
{0x12, 0x13},
{0x13, 0x12},
{0x15, 0x0F},
{0x13, 0x13},
{0x13, 0x14},
{0x14, 0x13},
{0x17, 0x0E},
{0x14, 0x14},
{0x14, 0x15},
{0x15, 0x14},
{0x18, 0x0F},
{0x15, 0x15},
{0x15, 0x16},
{0x16, 0x15},
{0x1A, 0x0E},
{0x16, 0x16},
{0x16, 0x17},
{0x17, 0x16},
{0x1B, 0x0F},
{0x17, 0x17},
{0x17, 0x18},
{0x18, 0x17},
{0x13, 0x21},
{0x18, 0x18},
{0x18, 0x19},
{0x19, 0x18},
{0x15, 0x20},
{0x19, 0x19},
{0x19, 0x1A},
{0x1A, 0x19},
{0x16, 0x21},
{0x1A, 0x1A},
{0x1A, 0x1B},
{0x1B, 0x1A},
{0x18, 0x20},
{0x1B, 0x1B},
{0x1B, 0x1C},
{0x1C, 0x1B},
{0x19, 0x21},
{0x1C, 0x1C},
{0x1C, 0x1D},
{0x1D, 0x1C},
{0x1B, 0x20},
{0x1D, 0x1D},
{0x1D, 0x1E},
{0x1E, 0x1D},
{0x1C, 0x21},
{0x1E, 0x1E},
{0x1E, 0x1F},
{0x1F, 0x1E},
{0x1E, 0x20},
{0x1F, 0x1F},
{0x1F, 0x20},
{0x20, 0x1E},
{0x20, 0x1F},
{0x21, 0x1E},
{0x20, 0x20},
{0x20, 0x21},
{0x21, 0x20},
{0x22, 0x1F},
{0x21, 0x21},
{0x21, 0x22},
{0x22, 0x21},
{0x24, 0x1E},
{0x22, 0x22},
{0x22, 0x23},
{0x23, 0x22},
{0x25, 0x1F},
{0x23, 0x23},
{0x23, 0x24},
{0x24, 0x23},
{0x27, 0x1E},
{0x24, 0x24},
{0x24, 0x25},
{0x25, 0x24},
{0x28, 0x1F},
{0x25, 0x25},
{0x25, 0x26},
{0x26, 0x25},
{0x2A, 0x1E},
{0x26, 0x26},
{0x26, 0x27},
{0x27, 0x26},
{0x2B, 0x1F},
{0x27, 0x27},
{0x27, 0x28},
{0x28, 0x27},
{0x23, 0x31},
{0x28, 0x28},
{0x28, 0x29},
{0x29, 0x28},
{0x25, 0x30},
{0x29, 0x29},
{0x29, 0x2A},
{0x2A, 0x29},
{0x26, 0x31},
{0x2A, 0x2A},
{0x2A, 0x2B},
{0x2B, 0x2A},
{0x28, 0x30},
{0x2B, 0x2B},
{0x2B, 0x2C},
{0x2C, 0x2B},
{0x29, 0x31},
{0x2C, 0x2C},
{0x2C, 0x2D},
{0x2D, 0x2C},
{0x2B, 0x30},
{0x2D, 0x2D},
{0x2D, 0x2E},
{0x2E, 0x2D},
{0x2C, 0x31},
{0x2E, 0x2E},
{0x2E, 0x2F},
{0x2F, 0x2E},
{0x2E, 0x30},
{0x2F, 0x2F},
{0x2F, 0x30},
{0x30, 0x2E},
{0x30, 0x2F},
{0x31, 0x2E},
{0x30, 0x30},
{0x30, 0x31},
{0x31, 0x30},
{0x32, 0x2F},
{0x31, 0x31},
{0x31, 0x32},
{0x32, 0x31},
{0x34, 0x2E},
{0x32, 0x32},
{0x32, 0x33},
{0x33, 0x32},
{0x35, 0x2F},
{0x33, 0x33},
{0x33, 0x34},
{0x34, 0x33},
{0x37, 0x2E},
{0x34, 0x34},
{0x34, 0x35},
{0x35, 0x34},
{0x38, 0x2F},
{0x35, 0x35},
{0x35, 0x36},
{0x36, 0x35},
{0x3A, 0x2E},
{0x36, 0x36},
{0x36, 0x37},
{0x37, 0x36},
{0x3B, 0x2F},
{0x37, 0x37},
{0x37, 0x38},
{0x38, 0x37},
{0x3D, 0x2E},
{0x38, 0x38},
{0x38, 0x39},
{0x39, 0x38},
{0x3E, 0x2F},
{0x39, 0x39},
{0x39, 0x3A},
{0x3A, 0x39},
{0x3A, 0x3A},
{0x3A, 0x3A},
{0x3A, 0x3B},
{0x3B, 0x3A},
{0x3B, 0x3B},
{0x3B, 0x3B},
{0x3B, 0x3C},
{0x3C, 0x3B},
{0x3C, 0x3C},
{0x3C, 0x3C},
{0x3C, 0x3D},
{0x3D, 0x3C},
{0x3D, 0x3D},
{0x3D, 0x3D},
{0x3D, 0x3E},
{0x3E, 0x3D},
{0x3E, 0x3E},
{0x3E, 0x3E},
{0x3E, 0x3F},
{0x3F, 0x3E},
{0x3F, 0x3F},
{0x3F, 0x3F},
};
extern uint8 OMatch5[256][2];
extern uint8 OMatch6[256][2];
extern uint8 OMatchAlpha5[256][2];
extern uint8 OMatchAlpha6[256][2];
void initSingleColorLookup();

File diff suppressed because it is too large Load Diff

@ -26,7 +26,6 @@
#include "CudaMath.h"
#include "../SingleColorLookup.h"
#define NUM_THREADS 64 // Number of threads per block.
@ -48,6 +47,9 @@ __device__ inline void swap(T & a, T & b)
b = tmp;
}
__constant__ uchar OMatch5[256][2];
__constant__ uchar OMatch6[256][2];
__constant__ float3 kColorMetric = { 1.0f, 1.0f, 1.0f };
__constant__ float3 kColorMetricSqr = { 1.0f, 1.0f, 1.0f };

@ -24,31 +24,24 @@
#include "CudaCompressorDXT.h"
#include "CudaUtils.h"
#include <nvcore/Debug.h>
#include <nvmath/Color.h>
#include <nvimage/Image.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include <nvtt/CompressionOptions.h>
#include <nvtt/OutputOptions.h>
#include <nvtt/QuickCompressDXT.h>
#include <nvtt/OptimalCompressDXT.h>
#include "nvcore/Debug.h"
#include "nvmath/Color.h"
#include "nvimage/Image.h"
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
#include "nvtt/CompressionOptions.h"
#include "nvtt/OutputOptions.h"
#include "nvtt/QuickCompressDXT.h"
#include "nvtt/OptimalCompressDXT.h"
#if defined HAVE_CUDA
#include <cuda_runtime_api.h>
#endif
#include <time.h>
#include <stdio.h>
using namespace nv;
using namespace nvtt;
#if defined HAVE_CUDA
#include <cuda_runtime_api.h>
#define MAX_BLOCKS 8192U // 32768, 65535
#define MAX_BLOCKS 8192U // 32768, 65535 // @@ Limit number of blocks on slow devices to prevent hitting the watchdog timer.
extern "C" void setupCompressKernel(const float weights[3]);
extern "C" void bindTextureToArray(cudaArray * d_data);
@ -62,31 +55,13 @@ extern "C" void compressKernelDXT3(uint firstBlock, uint blockNum, uint w, uint
#include "BitmapTable.h"
/*
// Convert linear image to block linear.
static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
{
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
for(uint by = 0; by < h; by++) {
for(uint bx = 0; bx < w; bx++) {
const uint bw = min(image->width() - bx * 4, 4U);
const uint bh = min(image->height() - by * 4, 4U);
for (uint i = 0; i < 16; i++) {
const int x = (i % 4) % bw;
const int y = (i / 4) % bh;
blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
}
}
}
}
*/
#include "nvtt/SingleColorLookup.h"
#endif
using namespace nv;
using namespace nvtt;
CudaContext::CudaContext() :
bitmapTable(NULL),
@ -111,6 +86,11 @@ CudaContext::CudaContext() :
// Allocate scratch buffers.
cudaMalloc((void**) &data, MAX_BLOCKS * 64U);
cudaMalloc((void**) &result, MAX_BLOCKS * 8U);
// Init single color lookup contant tables.
cudaMemcpyToSymbol("OMatch5", OMatch5, sizeof(OMatch5), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol("OMatch6", OMatch6, sizeof(OMatch6), 0, cudaMemcpyHostToDevice);
#endif
}
@ -300,6 +280,30 @@ void CudaCompressorDXT5::compressBlocks(uint first, uint count, uint w, uint h,
#if 0
/*
// Convert linear image to block linear.
static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
{
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
for(uint by = 0; by < h; by++) {
for(uint bx = 0; bx < w; bx++) {
const uint bw = min(image->width() - bx * 4, 4U);
const uint bh = min(image->height() - by * 4, 4U);
for (uint i = 0; i < 16; i++) {
const int x = (i % 4) % bw;
const int y = (i / 4) % bh;
blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
}
}
}
}
*/
/// Compress image using CUDA.
void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{

@ -383,8 +383,7 @@ Vec4 ClusterFit::SolveLeastSquares( Vec4& start, Vec4& end ) const
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
// Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); // IC: use approximate grid fitting.
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
Vec4 const onethird = VEC4_CONST( 1.0f/3.0f );
Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f );
a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
@ -459,8 +458,7 @@ float ClusterFit::SolveLeastSquares( Vec3& start, Vec3& end ) const
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
//Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
Vec3 const gridrcp(0.03227752766457f, 0.01583151765563f, 0.03227752766457f); // IC: use approximate grid fitting.
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;

Loading…
Cancel
Save