Apply singlecolorpatch.diff from Amir Ebrahimi.

Add single color compressor to CUDA implementation.
This commit is contained in:
castano
2008-02-16 09:53:07 +00:00
parent eb36ef86a7
commit b41752aa84
8 changed files with 110 additions and 19 deletions

View File

@ -28,6 +28,8 @@
#include "CudaMath.h"
#include "../SingleColorLookup.h"
#define NUM_THREADS 64 // Number of threads per block.
#if __DEVICE_EMULATION__
@ -117,7 +119,7 @@ __device__ void sortColors(const float * values, int * cmp)
////////////////////////////////////////////////////////////////////////////////
// Load color block to shared mem
////////////////////////////////////////////////////////////////////////////////
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16])
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor)
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
@ -128,7 +130,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
{
// Read color and copy to shared mem.
uint c = image[(bid) * 16 + idx];
colors[idx].z = ((c >> 0) & 0xFF) * (1.0f / 255.0f);
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
@ -137,11 +139,13 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Sort colors along the best fit line.
colorSums(colors, sums);
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
*sameColor = (axis == make_float3(0, 0, 0));
dps[idx] = dot(colors[idx], axis);
#if __DEVICE_EMULATION__
@ -997,6 +1001,20 @@ __device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xr
saveBlockDXT1(start, end, permutation, xrefs, result);
}
__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
{
const int bid = blockIdx.x;
int r = color.x * 255;
int g = color.y * 255;
int b = color.z * 255;
ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5) | OMatch5[b][0];
ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5) | OMatch5[b][1];
result[bid].x = (color1 << 16) | color0;
result[bid].y = 0xaaaaaaaa;
}
////////////////////////////////////////////////////////////////////////////////
@ -1007,9 +1025,16 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint
__shared__ float3 colors[16];
__shared__ float3 sums[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, xrefs);
loadColorBlock(image, colors, sums, xrefs, &sameColor);
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
__syncthreads();
ushort bestStart, bestEnd;
@ -1269,7 +1294,7 @@ __global__ void compressDXT5(const uint * permutations, const uint * image, uint
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, weights, xrefs);
__syncthreads();
compressAlpha(weights, result);

View File

@ -82,6 +82,12 @@ inline __device__ __host__ void operator /=(float3 & b, float f)
b.z *= inv;
}
inline __device__ __host__ bool operator ==(float3 a, float3 b)
{
return a.x == b.x && a.y == b.y && a.z == b.z;
}
// float2 operators
inline __device__ __host__ float2 operator *(float2 a, float2 b)
{
@ -187,15 +193,37 @@ inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
float m = max(max(x, y), z);
float iv = 1.0f / m;
#if __DEVICE_EMULATION__
if (m == 0.0f) iv = 0.0f;
#endif
v = make_float3(x*iv, y*iv, z*iv);
}
return v;
}
inline __device__ bool singleColor(const float3 * colors)
{
#if __DEVICE_EMULATION__
bool sameColor = false;
for (int i = 0; i < 16; i++)
{
sameColor &= (colors[idx] == colors[0]);
}
return sameColor;
#else
__shared__ int sameColor[16];
const int idx = threadIdx.x;
sameColor[idx] = (colors[idx] == colors[0]);
sameColor[idx] &= sameColor[idx^8];
sameColor[idx] &= sameColor[idx^4];
sameColor[idx] &= sameColor[idx^2];
sameColor[idx] &= sameColor[idx^1];
return sameColor[0];
#endif
}
inline __device__ void colorSums(const float3 * colors, float3 * sums)
{
#if __DEVICE_EMULATION__
@ -284,9 +312,7 @@ inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
float y = v.x * matrix[1] + v.y * matrix[2];
float m = max(x, y);
float iv = 1.0f / m;
#if __DEVICE_EMULATION__
if (m == 0.0f) iv = 0.0f;
#endif
v = make_float2(x*iv, y*iv);
}