Apply singlecolorpatch.diff from Amir Ebrahimi.
Add single color compressor to CUDA implementation.
This commit is contained in:
parent
eb36ef86a7
commit
b41752aa84
@ -110,6 +110,19 @@ void ColorBlock::splatY()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the block has a single color.
|
||||||
|
bool ColorBlock::isSingleColor() const
|
||||||
|
{
|
||||||
|
for(int i = 1; i < 16; i++)
|
||||||
|
{
|
||||||
|
if (m_color[0] != m_color[i])
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// Count number of unique colors in this color block.
|
/// Count number of unique colors in this color block.
|
||||||
uint ColorBlock::countUniqueColors() const
|
uint ColorBlock::countUniqueColors() const
|
||||||
|
@ -24,6 +24,7 @@ namespace nv
|
|||||||
void splatX();
|
void splatX();
|
||||||
void splatY();
|
void splatY();
|
||||||
|
|
||||||
|
bool isSingleColor() const;
|
||||||
uint countUniqueColors() const;
|
uint countUniqueColors() const;
|
||||||
Color32 averageColor() const;
|
Color32 averageColor() const;
|
||||||
bool hasAlpha() const;
|
bool hasAlpha() const;
|
||||||
|
@ -79,6 +79,9 @@ TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
|
|||||||
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
|
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
|
||||||
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
|
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
|
||||||
|
|
||||||
|
ADD_EXECUTABLE(stress tests/stress.cpp tools/cmdline.h)
|
||||||
|
TARGET_LINK_LIBRARIES(stress nvcore nvmath nvimage nvtt)
|
||||||
|
|
||||||
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
|
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
|
||||||
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
|
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)
|
||||||
|
|
||||||
|
@ -69,7 +69,14 @@ void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & ou
|
|||||||
for (uint x = 0; x < w; x += 4) {
|
for (uint x = 0; x < w; x += 4) {
|
||||||
rgba.init(image, x, y);
|
rgba.init(image, x, y);
|
||||||
|
|
||||||
QuickCompress::compressDXT1(rgba, &block);
|
if (rgba.isSingleColor())
|
||||||
|
{
|
||||||
|
QuickCompress::compressDXT1(rgba.color(0), &block);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
QuickCompress::compressDXT1(rgba, &block);
|
||||||
|
}
|
||||||
|
|
||||||
if (outputOptions.outputHandler != NULL) {
|
if (outputOptions.outputHandler != NULL) {
|
||||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||||
@ -214,10 +221,16 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
|
|||||||
|
|
||||||
rgba.init(image, x, y);
|
rgba.init(image, x, y);
|
||||||
|
|
||||||
// Compress color.
|
if (rgba.isSingleColor())
|
||||||
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
|
{
|
||||||
fit.SetColourSet(&colours, squish::kDxt1);
|
QuickCompress::compressDXT1(rgba.color(0), &block);
|
||||||
fit.Compress(&block);
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
|
||||||
|
fit.SetColourSet(&colours, squish::kDxt1);
|
||||||
|
fit.Compress(&block);
|
||||||
|
}
|
||||||
|
|
||||||
if (outputOptions.outputHandler != NULL) {
|
if (outputOptions.outputHandler != NULL) {
|
||||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||||
|
@ -48,7 +48,12 @@ void initTables()
|
|||||||
};
|
};
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const static uint8 OMatch5[256][2] =
|
#if __CUDACC__
|
||||||
|
__constant__ unsigned short
|
||||||
|
#else
|
||||||
|
const static uint8
|
||||||
|
#endif
|
||||||
|
OMatch5[256][2] =
|
||||||
{
|
{
|
||||||
{0x00, 0x00},
|
{0x00, 0x00},
|
||||||
{0x00, 0x00},
|
{0x00, 0x00},
|
||||||
@ -308,7 +313,12 @@ const static uint8 OMatch5[256][2] =
|
|||||||
{0x1F, 0x1F},
|
{0x1F, 0x1F},
|
||||||
};
|
};
|
||||||
|
|
||||||
const static uint8 OMatch6[256][2] =
|
#if __CUDACC__
|
||||||
|
__constant__ unsigned short
|
||||||
|
#else
|
||||||
|
const static uint8
|
||||||
|
#endif
|
||||||
|
OMatch6[256][2] =
|
||||||
{
|
{
|
||||||
{0x00, 0x00},
|
{0x00, 0x00},
|
||||||
{0x00, 0x01},
|
{0x00, 0x01},
|
||||||
|
@ -28,6 +28,8 @@
|
|||||||
|
|
||||||
#include "CudaMath.h"
|
#include "CudaMath.h"
|
||||||
|
|
||||||
|
#include "../SingleColorLookup.h"
|
||||||
|
|
||||||
#define NUM_THREADS 64 // Number of threads per block.
|
#define NUM_THREADS 64 // Number of threads per block.
|
||||||
|
|
||||||
#if __DEVICE_EMULATION__
|
#if __DEVICE_EMULATION__
|
||||||
@ -117,7 +119,7 @@ __device__ void sortColors(const float * values, int * cmp)
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Load color block to shared mem
|
// Load color block to shared mem
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16])
|
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor)
|
||||||
{
|
{
|
||||||
const int bid = blockIdx.x;
|
const int bid = blockIdx.x;
|
||||||
const int idx = threadIdx.x;
|
const int idx = threadIdx.x;
|
||||||
@ -128,7 +130,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
|||||||
{
|
{
|
||||||
// Read color and copy to shared mem.
|
// Read color and copy to shared mem.
|
||||||
uint c = image[(bid) * 16 + idx];
|
uint c = image[(bid) * 16 + idx];
|
||||||
|
|
||||||
colors[idx].z = ((c >> 0) & 0xFF) * (1.0f / 255.0f);
|
colors[idx].z = ((c >> 0) & 0xFF) * (1.0f / 255.0f);
|
||||||
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
|
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
|
||||||
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
|
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
|
||||||
@ -137,11 +139,13 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
|||||||
#if __DEVICE_EMULATION__
|
#if __DEVICE_EMULATION__
|
||||||
} __debugsync(); if (idx < 16) {
|
} __debugsync(); if (idx < 16) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Sort colors along the best fit line.
|
// Sort colors along the best fit line.
|
||||||
colorSums(colors, sums);
|
colorSums(colors, sums);
|
||||||
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
|
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
|
||||||
|
|
||||||
|
*sameColor = (axis == make_float3(0, 0, 0));
|
||||||
|
|
||||||
dps[idx] = dot(colors[idx], axis);
|
dps[idx] = dot(colors[idx], axis);
|
||||||
|
|
||||||
#if __DEVICE_EMULATION__
|
#if __DEVICE_EMULATION__
|
||||||
@ -997,6 +1001,20 @@ __device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xr
|
|||||||
saveBlockDXT1(start, end, permutation, xrefs, result);
|
saveBlockDXT1(start, end, permutation, xrefs, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
|
||||||
|
{
|
||||||
|
const int bid = blockIdx.x;
|
||||||
|
|
||||||
|
int r = color.x * 255;
|
||||||
|
int g = color.y * 255;
|
||||||
|
int b = color.z * 255;
|
||||||
|
|
||||||
|
ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5) | OMatch5[b][0];
|
||||||
|
ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5) | OMatch5[b][1];
|
||||||
|
|
||||||
|
result[bid].x = (color1 << 16) | color0;
|
||||||
|
result[bid].y = 0xaaaaaaaa;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -1007,9 +1025,16 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint
|
|||||||
__shared__ float3 colors[16];
|
__shared__ float3 colors[16];
|
||||||
__shared__ float3 sums[16];
|
__shared__ float3 sums[16];
|
||||||
__shared__ int xrefs[16];
|
__shared__ int xrefs[16];
|
||||||
|
__shared__ int sameColor;
|
||||||
|
|
||||||
loadColorBlock(image, colors, sums, xrefs);
|
loadColorBlock(image, colors, sums, xrefs, &sameColor);
|
||||||
|
|
||||||
|
if (sameColor)
|
||||||
|
{
|
||||||
|
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
ushort bestStart, bestEnd;
|
ushort bestStart, bestEnd;
|
||||||
@ -1269,7 +1294,7 @@ __global__ void compressDXT5(const uint * permutations, const uint * image, uint
|
|||||||
__shared__ int xrefs[16];
|
__shared__ int xrefs[16];
|
||||||
|
|
||||||
loadColorBlock(image, colors, sums, weights, xrefs);
|
loadColorBlock(image, colors, sums, weights, xrefs);
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
compressAlpha(weights, result);
|
compressAlpha(weights, result);
|
||||||
|
@ -82,6 +82,12 @@ inline __device__ __host__ void operator /=(float3 & b, float f)
|
|||||||
b.z *= inv;
|
b.z *= inv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline __device__ __host__ bool operator ==(float3 a, float3 b)
|
||||||
|
{
|
||||||
|
return a.x == b.x && a.y == b.y && a.z == b.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// float2 operators
|
// float2 operators
|
||||||
inline __device__ __host__ float2 operator *(float2 a, float2 b)
|
inline __device__ __host__ float2 operator *(float2 a, float2 b)
|
||||||
{
|
{
|
||||||
@ -187,15 +193,37 @@ inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
|
|||||||
float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
|
float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
|
||||||
float m = max(max(x, y), z);
|
float m = max(max(x, y), z);
|
||||||
float iv = 1.0f / m;
|
float iv = 1.0f / m;
|
||||||
#if __DEVICE_EMULATION__
|
|
||||||
if (m == 0.0f) iv = 0.0f;
|
if (m == 0.0f) iv = 0.0f;
|
||||||
#endif
|
|
||||||
v = make_float3(x*iv, y*iv, z*iv);
|
v = make_float3(x*iv, y*iv, z*iv);
|
||||||
}
|
}
|
||||||
|
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline __device__ bool singleColor(const float3 * colors)
|
||||||
|
{
|
||||||
|
#if __DEVICE_EMULATION__
|
||||||
|
bool sameColor = false;
|
||||||
|
for (int i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
sameColor &= (colors[idx] == colors[0]);
|
||||||
|
}
|
||||||
|
return sameColor;
|
||||||
|
#else
|
||||||
|
__shared__ int sameColor[16];
|
||||||
|
|
||||||
|
const int idx = threadIdx.x;
|
||||||
|
|
||||||
|
sameColor[idx] = (colors[idx] == colors[0]);
|
||||||
|
sameColor[idx] &= sameColor[idx^8];
|
||||||
|
sameColor[idx] &= sameColor[idx^4];
|
||||||
|
sameColor[idx] &= sameColor[idx^2];
|
||||||
|
sameColor[idx] &= sameColor[idx^1];
|
||||||
|
|
||||||
|
return sameColor[0];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
inline __device__ void colorSums(const float3 * colors, float3 * sums)
|
inline __device__ void colorSums(const float3 * colors, float3 * sums)
|
||||||
{
|
{
|
||||||
#if __DEVICE_EMULATION__
|
#if __DEVICE_EMULATION__
|
||||||
@ -284,9 +312,7 @@ inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
|
|||||||
float y = v.x * matrix[1] + v.y * matrix[2];
|
float y = v.x * matrix[1] + v.y * matrix[2];
|
||||||
float m = max(x, y);
|
float m = max(x, y);
|
||||||
float iv = 1.0f / m;
|
float iv = 1.0f / m;
|
||||||
#if __DEVICE_EMULATION__
|
|
||||||
if (m == 0.0f) iv = 0.0f;
|
if (m == 0.0f) iv = 0.0f;
|
||||||
#endif
|
|
||||||
v = make_float2(x*iv, y*iv);
|
v = make_float2(x*iv, y*iv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ struct MyOutputHandler : public nvtt::OutputHandler
|
|||||||
|
|
||||||
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
|
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
|
||||||
{
|
{
|
||||||
assert(size == OUTPUT_SIZE);
|
assert(size == sizeof(int) * OUTPUT_SIZE);
|
||||||
assert(width == WIDTH);
|
assert(width == WIDTH);
|
||||||
assert(height == HEIGHT);
|
assert(height == HEIGHT);
|
||||||
assert(depth == 1);
|
assert(depth == 1);
|
||||||
@ -177,7 +177,7 @@ void precomp()
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
precomp();
|
//precomp();
|
||||||
|
|
||||||
nvtt::InputOptions inputOptions;
|
nvtt::InputOptions inputOptions;
|
||||||
inputOptions.setTextureLayout(nvtt::TextureType_2D, 1024, 1024);
|
inputOptions.setTextureLayout(nvtt::TextureType_2D, 1024, 1024);
|
||||||
|
Loading…
Reference in New Issue
Block a user