Add single color checks to CUDA compressors.
Use optimized bitmap table for CTX compressor.
This commit is contained in:
@ -159,7 +159,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16])
|
||||
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
const int idx = threadIdx.x;
|
||||
@ -189,6 +189,8 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
||||
colorSums(colors, sums);
|
||||
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
|
||||
|
||||
*sameColor = (axis == make_float3(0, 0, 0));
|
||||
|
||||
dps[idx] = dot(rawColors[idx], axis);
|
||||
|
||||
#if __DEVICE_EMULATION__
|
||||
@ -205,7 +207,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16])
|
||||
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16], int * sameColor)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
const int idx = threadIdx.x;
|
||||
@ -229,6 +231,8 @@ __device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sum
|
||||
colorSums(colors, sums);
|
||||
float2 axis = bestFitLine(colors, sums[0]);
|
||||
|
||||
*sameColor = (axis == make_float2(0, 0));
|
||||
|
||||
dps[idx] = dot(colors[idx], axis);
|
||||
|
||||
#if __DEVICE_EMULATION__
|
||||
@ -861,7 +865,7 @@ __device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, c
|
||||
for(int i = 0; i < 16; i++)
|
||||
{
|
||||
int pidx = idx + NUM_THREADS * i;
|
||||
if (pidx >= 992) break;
|
||||
if (pidx >= 704) break;
|
||||
|
||||
ushort start, end;
|
||||
uint permutation = permutations[pidx];
|
||||
@ -1024,6 +1028,41 @@ __device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void saveSingleColorBlockDXT1(float2 color, uint2 * result)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
|
||||
int r = color.x * 255;
|
||||
int g = color.y * 255;
|
||||
|
||||
ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5);
|
||||
ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5);
|
||||
|
||||
if (color0 < color1)
|
||||
{
|
||||
result[bid].x = (color0 << 16) | color1;
|
||||
result[bid].y = 0xffffffff;
|
||||
}
|
||||
else
|
||||
{
|
||||
result[bid].x = (color1 << 16) | color0;
|
||||
result[bid].y = 0xaaaaaaaa;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void saveSingleColorBlockCTX1(float2 color, uint2 * result)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
|
||||
int r = color.x * 255;
|
||||
int g = color.y * 255;
|
||||
|
||||
ushort color0 = (r << 8) | (g);
|
||||
|
||||
result[bid].x = (color0 << 16) | color0;
|
||||
result[bid].y = 0x00000000;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Compress color block
|
||||
@ -1069,11 +1108,18 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
|
||||
__shared__ float3 sums[16];
|
||||
__shared__ float weights[16];
|
||||
__shared__ int xrefs[16];
|
||||
__shared__ int sameColor;
|
||||
|
||||
loadColorBlock(image, colors, sums, weights, xrefs);
|
||||
loadColorBlock(image, colors, sums, weights, xrefs, &sameColor);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (sameColor)
|
||||
{
|
||||
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
|
||||
return;
|
||||
}
|
||||
|
||||
ushort bestStart, bestEnd;
|
||||
uint bestPermutation;
|
||||
|
||||
@ -1097,11 +1143,18 @@ __global__ void compressNormalDXT1(const uint * permutations, const uint * image
|
||||
__shared__ float2 colors[16];
|
||||
__shared__ float2 sums[16];
|
||||
__shared__ int xrefs[16];
|
||||
|
||||
loadColorBlock(image, colors, sums, xrefs);
|
||||
__shared__ int sameColor;
|
||||
|
||||
loadColorBlock(image, colors, sums, xrefs, &sameColor);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (sameColor)
|
||||
{
|
||||
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
|
||||
return;
|
||||
}
|
||||
|
||||
ushort bestStart, bestEnd;
|
||||
uint bestPermutation;
|
||||
|
||||
@ -1124,11 +1177,18 @@ __global__ void compressCTX1(const uint * permutations, const uint * image, uint
|
||||
__shared__ float2 colors[16];
|
||||
__shared__ float2 sums[16];
|
||||
__shared__ int xrefs[16];
|
||||
__shared__ int sameColor;
|
||||
|
||||
loadColorBlock(image, colors, sums, xrefs);
|
||||
loadColorBlock(image, colors, sums, xrefs, &sameColor);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (sameColor)
|
||||
{
|
||||
if (threadIdx.x == 0) saveSingleColorBlockCTX1(colors[0], result);
|
||||
return;
|
||||
}
|
||||
|
||||
ushort bestStart, bestEnd;
|
||||
uint bestPermutation;
|
||||
|
||||
|
Reference in New Issue
Block a user