Add single color checks to CUDA compressors.

Use optimized bitmap table for CTX compressor.
This commit is contained in:
castano
2008-05-06 19:52:27 +00:00
parent 246f2a409a
commit d855d0461b
7 changed files with 863 additions and 20 deletions

View File

@ -159,7 +159,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
}
}
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16])
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
@ -189,6 +189,8 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
colorSums(colors, sums);
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
*sameColor = (axis == make_float3(0, 0, 0));
dps[idx] = dot(rawColors[idx], axis);
#if __DEVICE_EMULATION__
@ -205,7 +207,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
}
}
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16])
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16], int * sameColor)
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
@ -229,6 +231,8 @@ __device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sum
colorSums(colors, sums);
float2 axis = bestFitLine(colors, sums[0]);
*sameColor = (axis == make_float2(0, 0));
dps[idx] = dot(colors[idx], axis);
#if __DEVICE_EMULATION__
@ -861,7 +865,7 @@ __device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, c
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
if (pidx >= 704) break;
ushort start, end;
uint permutation = permutations[pidx];
@ -1024,6 +1028,41 @@ __device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
}
}
__device__ void saveSingleColorBlockDXT1(float2 color, uint2 * result)
{
const int bid = blockIdx.x;
int r = color.x * 255;
int g = color.y * 255;
ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5);
ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5);
if (color0 < color1)
{
result[bid].x = (color0 << 16) | color1;
result[bid].y = 0xffffffff;
}
else
{
result[bid].x = (color1 << 16) | color0;
result[bid].y = 0xaaaaaaaa;
}
}
__device__ void saveSingleColorBlockCTX1(float2 color, uint2 * result)
{
const int bid = blockIdx.x;
int r = color.x * 255;
int g = color.y * 255;
ushort color0 = (r << 8) | (g);
result[bid].x = (color0 << 16) | color0;
result[bid].y = 0x00000000;
}
////////////////////////////////////////////////////////////////////////////////
// Compress color block
@ -1069,11 +1108,18 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
__shared__ float3 sums[16];
__shared__ float weights[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, weights, xrefs);
loadColorBlock(image, colors, sums, weights, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
@ -1097,11 +1143,18 @@ __global__ void compressNormalDXT1(const uint * permutations, const uint * image
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__shared__ int sameColor;
loadColorBlock(image, colors, sums, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
@ -1124,11 +1177,18 @@ __global__ void compressCTX1(const uint * permutations, const uint * image, uint
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, xrefs);
loadColorBlock(image, colors, sums, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockCTX1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;